Exemplo n.º 1
0
function philosophize($next_page, $rand)
{
    // create URL by concatenating the part of the address common to all articles
    // with the particular article title
    $url = "http://en.wikipedia.org/wiki/" . $next_page;
    // load html into a string
    $aContext = array('http' => array('proxy' => 'tcp://10.2.20.18:9090', 'request_fulluri' => true));
    $cxContext = stream_context_create($aContext);
    $source = file_get_contents($url, False, $cxContext);
    if (strcasecmp(substr($source, 0, 15), "<!DOCTYPE html>") != 0) {
        $source = gzdecoder($source);
    }
    if ($source === false) {
        return false;
    }
    $title = gettitle($source);
    unset($next_page);
    // Magic!
    $next = scrape($source, 0);
    if ($next === false) {
        return false;
    }
    return ["next" => $next, "title" => $title, "rand" => $rand == true ? str_replace(' ', '_', $title) : null];
}
Exemplo n.º 2
0
function sd_finallink($curl_content)
{
    $regex = '/"sd_src_no_ratelimit":"([^"]+)"/';
    if (preg_match($regex, $curl_content, $match1)) {
        $str = preg_replace_callback('/\\\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $match1[1]);
        $decodedStr = str_replace('\\/', '/', $str);
        return $decodedStr;
    } else {
        return;
    }
}
function gettitle($curl_content)
{
    $regex = "/title id=\"pageTitle\">(.+?)<\\/title>/";
    if (preg_match($regex, $curl_content, $title_match)) {
        $title_match = explode("|", $title_match[1]);
        return $title_match[0];
    } else {
        return;
    }
}
$hdlink = hd_finallink($data);
$sdlink = sd_finallink($data);
$title = gettitle($data);
$message = array();
if ($sdlink != "") {
    $message = array('type' => 'success', 'title' => $title, 'hd_download_url' => $hdlink, 'sd_download_url' => $sdlink);
} else {
    $message = array('type' => 'failure', 'message' => 'Error retrieving the download link for the url. Please try again later');
}
echo json_encode($message);
Exemplo n.º 3
0
    }
    if ($end == '.jp2' || $end == '.tif') {
        // get basename
        $base = basename($dfil, $end);
        $seqdir = getseqdir($base);
        $dirname = getdirname($base);
        print "Now working with {$dirname}...\n";
        // find seq
        $s = explode('/', $seqdir);
        $seq = $s[1];
        $newdir = './' . $seqdir;
        $new = './' . $seqdir . "/" . 'OBJ' . $end;
        // what is xbase xml of this image
        $thisxml = getdirname($base) . ".xml";
        // get booktitle specific to this image
        $booktitle = gettitle($thisxml, $meta);
        // encode entities
        $booktitle = htmlentities($booktitle, ENT_QUOTES, 'UTF-8');
        // make mods.xml
        $pagexml = <<<EOL
<?xml version="1.0" encoding="UTF-8"?>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" xmlns="http://www.loc.gov/mods/v3">
  <mods:titleInfo>
    <mods:title>{$booktitle} : page {$seq}</mods:title>
  </mods:titleInfo>
</mods:mods>
EOL;
        // switch contexts to fix syntax highlighting
        $mfile = $seqdir . "/" . "MODS.xml";
        print "Writing MODS.xml\n";
        file_put_contents($mfile, $pagexml);