function philosophize($next_page, $rand) { // create URL by concatenating the part of the address common to all articles // with the particular article title $url = "http://en.wikipedia.org/wiki/" . $next_page; // load html into a string $aContext = array('http' => array('proxy' => 'tcp://10.2.20.18:9090', 'request_fulluri' => true)); $cxContext = stream_context_create($aContext); $source = file_get_contents($url, False, $cxContext); if (strcasecmp(substr($source, 0, 15), "<!DOCTYPE html>") != 0) { $source = gzdecoder($source); } if ($source === false) { return false; } $title = gettitle($source); unset($next_page); // Magic! $next = scrape($source, 0); if ($next === false) { return false; } return ["next" => $next, "title" => $title, "rand" => $rand == true ? str_replace(' ', '_', $title) : null]; }
function sd_finallink($curl_content) { $regex = '/"sd_src_no_ratelimit":"([^"]+)"/'; if (preg_match($regex, $curl_content, $match1)) { $str = preg_replace_callback('/\\\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $match1[1]); $decodedStr = str_replace('\\/', '/', $str); return $decodedStr; } else { return; } } function gettitle($curl_content) { $regex = "/title id=\"pageTitle\">(.+?)<\\/title>/"; if (preg_match($regex, $curl_content, $title_match)) { $title_match = explode("|", $title_match[1]); return $title_match[0]; } else { return; } } $hdlink = hd_finallink($data); $sdlink = sd_finallink($data); $title = gettitle($data); $message = array(); if ($sdlink != "") { $message = array('type' => 'success', 'title' => $title, 'hd_download_url' => $hdlink, 'sd_download_url' => $sdlink); } else { $message = array('type' => 'failure', 'message' => 'Error retrieving the download link for the url. Please try again later'); } echo json_encode($message);
} if ($end == '.jp2' || $end == '.tif') { // get basename $base = basename($dfil, $end); $seqdir = getseqdir($base); $dirname = getdirname($base); print "Now working with {$dirname}...\n"; // find seq $s = explode('/', $seqdir); $seq = $s[1]; $newdir = './' . $seqdir; $new = './' . $seqdir . "/" . 'OBJ' . $end; // what is xbase xml of this image $thisxml = getdirname($base) . ".xml"; // get booktitle specific to this image $booktitle = gettitle($thisxml, $meta); // encode entities $booktitle = htmlentities($booktitle, ENT_QUOTES, 'UTF-8'); // make mods.xml $pagexml = <<<EOL <?xml version="1.0" encoding="UTF-8"?> <mods:mods xmlns:mods="http://www.loc.gov/mods/v3" xmlns="http://www.loc.gov/mods/v3"> <mods:titleInfo> <mods:title>{$booktitle} : page {$seq}</mods:title> </mods:titleInfo> </mods:mods> EOL; // switch contexts to fix syntax highlighting $mfile = $seqdir . "/" . "MODS.xml"; print "Writing MODS.xml\n"; file_put_contents($mfile, $pagexml);