Exemple #1
0
 while (!feof($fh)) {
     $line = fgetcsv($fh, 0, ',', '"');
     if (isset($line[1]) && $line[1] !== '') {
         if (isset($line[2]) && $line[2] === '') {
             $timedoutresponsescount++;
             $sLine = $line[0] . $s . $line[1];
             fwrite($fsw, $sLine . "\n");
         } else {
             if (!isset($line[4])) {
                 if ($prevLine !== '') {
                     echo "[ERROR] Error parsing {$inputfilename} right after " . $prevLine . "\n";
                 }
                 $sLine = '';
             } else {
                 if (!isset($line[5]) || $line[5] === -1) {
                     $line[5] = pagerank('http://' . preg_replace('@http[s]?://@i', '', $line[1]));
                 }
                 $sLine = preg_replace('@\\t\\n@', "\n", "{$line[0]}{$s}{$line[1]}{$s}\"{$line[2]}\"{$s}\"{$line[3]}\"{$s}\"{$line[4]}\"{$s}{$line[5]}");
                 if (preg_match($pattern, $sLine)) {
                     fwrite($fw, $sLine . "\n");
                 }
             }
         }
         $prevLine = $sLine;
     }
 }
 fclose($fsw);
 fclose($fh);
 if (filesize($swapOutputFile) === 0) {
     echo "No more timed out requests found after repass " . ($i + 1) . ".\n Exiting...\n";
     break;
Exemple #2
0
function request_callback($response, $info, $request)
{
    $s = ',';
    // CSV output file separator
    $doc = new DOMDocument();
    @$doc->loadHTML($response);
    $titletags = $doc->getElementsByTagName('title');
    if ($titletags->length > 0) {
        $titletag = $titletags->item(0);
        $title = preg_replace('@(?<!\\\\)"@', '\\"', $titletag->textContent);
    } else {
        $title = '';
    }
    $metatags = $doc->getElementsByTagName('meta');
    $description = '';
    $keywords = '';
    for ($i = 0; $i < $metatags->length; $i++) {
        $metatag = $metatags->item($i);
        /** @noinspection PhpUndefinedMethodInspection */
        if (strtolower($metatag->getAttribute('name')) === 'description') {
            /** @noinspection PhpUndefinedMethodInspection */
            $description = preg_replace('@(?<!\\\\)"@', '\\"', $metatag->getAttribute('content'));
        } else {
            /** @noinspection PhpUndefinedMethodInspection */
            if (strtolower($metatag->getAttribute('name')) === 'keywords') {
                /** @noinspection PhpUndefinedMethodInspection */
                $keywords = preg_replace('@(?<!\\\\)"@', '\\"', $metatag->getAttribute('content'));
            }
        }
    }
    // This will work fine on concurrency as long as we're not trying to write more than 64KB (see http://www.php.net/manual/en/function.stream-set-write-buffer.php)
    $line = preg_replace('@\\t\\n@', "\n", "{$request->rank}{$s}{$request->url}{$s}\"{$title}\"{$s}\"{$keywords}\"{$s}\"{$description}\"{$s}" . (isset($request->pagerank) ? $request->pagerank : pagerank($request->url)));
    $pattern = '@^[\\x{000a}\\x{000d}\\x{0020}-\\x{007e}\\x{2000}-\\x{27ff}]*$@u';
    //only English (ASCII printable and unicode general extensions) characters
    if (preg_match($pattern, $line)) {
        fwrite($request->outputfilehandler, $line . "\n");
    }
    //echo $line."\n";
}
Exemple #3
0
 function showpr($url)
 {
     $pr = pagerank($url);
     echo $pr;
     //return $pr;
 }