public function onFinish(FilterCrawlerProcessEvent $event)
 {
     $rollingCurl = new \RollingCurl\RollingCurl();
     $rollingCurl->setSimultaneousLimit(50);
     // kicks out javascript:void(0) and # urls
     $validator = new NoPseudoUrl();
     // loop through all the links and add them to rollingcurl
     foreach ($this->links as &$link) {
         if ($validator->isValid($link)) {
             // add get request to curl
             $rollingCurl->get($link->getLinkHref(), null, array(&$link));
         }
     }
     $brokenLinks = array();
     $rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use(&$brokenLinks) {
         $link = $request->getOptions()[0];
         echo "checking -> " . $link->getLinkHref() . "\n";
         $link->setStatusCode($request->getResponseInfo()['http_code']);
         if ($link->getStatusCode() != 200) {
             $brokenLinks[] = $link;
         }
     });
     $rollingCurl->execute();
     $this->brokenLinks = $brokenLinks;
 }
Esempio n. 2
0
<?php

require __DIR__ . '/../src/RollingCurl/RollingCurl.php';
require __DIR__ . '/../src/RollingCurl/Request.php';
$rollingCurl = new \RollingCurl\RollingCurl();
$rollingCurl->get('http://yahoo.com')->get('http://google.com')->get('http://hotmail.com')->get('http://msn.com')->get('http://reddit.com')->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) {
    if (preg_match("#<title>(.*)</title>#i", $request->getResponseText(), $out)) {
        $title = $out[1];
    } else {
        $title = '[No Title Tag Found]';
    }
    echo "Fetch complete for (" . $request->getUrl() . ") {$title} " . PHP_EOL;
})->execute();
Esempio n. 3
0
<?php

require __DIR__ . '/../src/RollingCurl/RollingCurl.php';
require __DIR__ . '/../src/RollingCurl/Request.php';
$rollingCurl = new \RollingCurl\RollingCurl();
for ($i = 0; $i <= 500; $i += 10) {
    // https://www.google.com/search?q=curl&start=10
    $rollingCurl->get('https://www.google.com/search?q=curl&start=' . $i);
}
$results = array();
$start = microtime(true);
echo "Fetching..." . PHP_EOL;
$rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use(&$results) {
    if (preg_match_all('#<h3 class="r"><a href="([^"]+)">(.*)</a></h3>#iU', $request->getResponseText(), $out)) {
        foreach ($out[1] as $idx => $url) {
            parse_str(parse_url($url, PHP_URL_QUERY), $params);
            if (isset($params['q'])) {
                $results[$params['q']] = strip_tags($out[2][$idx]);
            }
        }
    }
    echo "Fetch complete for (" . $request->getUrl() . ")" . PHP_EOL;
})->setSimultaneousLimit(10)->execute();
echo "...done in " . (microtime(true) - $start) . PHP_EOL;
echo "All results: " . PHP_EOL;
print_r($results);
Esempio n. 4
0
<?php

require __DIR__ . '/../src/RollingCurl/RollingCurl.php';
require __DIR__ . '/../src/RollingCurl/Request.php';
// using this library to do a single request is a bit silly, but it will work.
$rollingCurl = new \RollingCurl\RollingCurl();
$rollingCurl->get('http://google.com')->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) {
    if (preg_match("#<title>(.*)</title>#i", $request->getResponseText(), $out)) {
        $title = $out[1];
    } else {
        $title = '[No Title Tag Found]';
    }
    echo "Fetch complete for (" . $request->getUrl() . ") {$title} " . PHP_EOL;
})->execute();
Esempio n. 5
0
<?php

require __DIR__ . '/vendor/autoload.php';
if (!isset($argv[1])) {
    echo 'No board name given (i.e. "Southpark_Sounds_3").';
    die(1);
}
$savePath = __DIR__ . '/sounds/';
$boardName = $argv[1];
$json = file_get_contents('http://www.soundboard.com/handler/gettrackjson.ashx?boardname=' . $boardName);
$entries = json_decode($json, true);
if (empty($entries)) {
    echo sprintf('No sounds could be found for board "%s"', $boardName);
    die(1);
}
$rollingCurl = new \RollingCurl\RollingCurl();
$fileNameMap = [];
foreach ($entries as $entry) {
    $rollingCurl->get($entry['mp3']);
    $fileNameMap[$entry['mp3']] = $entry['title'];
}
$rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use($fileNameMap, $savePath) {
    $fileName = $fileNameMap[$request->getUrl()];
    if (!isset($request->getResponseInfo()['content_type']) || 'audio/mpeg' !== $request->getResponseInfo()['content_type']) {
        echo sprintf('File "%s" is no audio/mpeg file.%s', $fileName, PHP_EOL);
    } else {
        $fileNameExt = $fileName . '.mp3';
        file_put_contents($savePath . $fileNameExt, $request->getResponseText());
        echo sprintf('Downloaded: %s%s', $fileNameExt, PHP_EOL);
    }
})->setSimultaneousLimit(3)->execute();