public function onFinish(FilterCrawlerProcessEvent $event) { $rollingCurl = new \RollingCurl\RollingCurl(); $rollingCurl->setSimultaneousLimit(50); // kicks out javascript:void(0) and # urls $validator = new NoPseudoUrl(); // loop through all the links and add them to rollingcurl foreach ($this->links as &$link) { if ($validator->isValid($link)) { // add get request to curl $rollingCurl->get($link->getLinkHref(), null, array(&$link)); } } $brokenLinks = array(); $rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use(&$brokenLinks) { $link = $request->getOptions()[0]; echo "checking -> " . $link->getLinkHref() . "\n"; $link->setStatusCode($request->getResponseInfo()['http_code']); if ($link->getStatusCode() != 200) { $brokenLinks[] = $link; } }); $rollingCurl->execute(); $this->brokenLinks = $brokenLinks; }
<?php require __DIR__ . '/../src/RollingCurl/RollingCurl.php'; require __DIR__ . '/../src/RollingCurl/Request.php'; $rollingCurl = new \RollingCurl\RollingCurl(); $rollingCurl->get('http://yahoo.com')->get('http://google.com')->get('http://hotmail.com')->get('http://msn.com')->get('http://reddit.com')->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) { if (preg_match("#<title>(.*)</title>#i", $request->getResponseText(), $out)) { $title = $out[1]; } else { $title = '[No Title Tag Found]'; } echo "Fetch complete for (" . $request->getUrl() . ") {$title} " . PHP_EOL; })->execute();
<?php require __DIR__ . '/../src/RollingCurl/RollingCurl.php'; require __DIR__ . '/../src/RollingCurl/Request.php'; $rollingCurl = new \RollingCurl\RollingCurl(); for ($i = 0; $i <= 500; $i += 10) { // https://www.google.com/search?q=curl&start=10 $rollingCurl->get('https://www.google.com/search?q=curl&start=' . $i); } $results = array(); $start = microtime(true); echo "Fetching..." . PHP_EOL; $rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use(&$results) { if (preg_match_all('#<h3 class="r"><a href="([^"]+)">(.*)</a></h3>#iU', $request->getResponseText(), $out)) { foreach ($out[1] as $idx => $url) { parse_str(parse_url($url, PHP_URL_QUERY), $params); if (isset($params['q'])) { $results[$params['q']] = strip_tags($out[2][$idx]); } } } echo "Fetch complete for (" . $request->getUrl() . ")" . PHP_EOL; })->setSimultaneousLimit(10)->execute(); echo "...done in " . (microtime(true) - $start) . PHP_EOL; echo "All results: " . PHP_EOL; print_r($results);
<?php require __DIR__ . '/../src/RollingCurl/RollingCurl.php'; require __DIR__ . '/../src/RollingCurl/Request.php'; // using this library to do a single request is a bit silly, but it will work. $rollingCurl = new \RollingCurl\RollingCurl(); $rollingCurl->get('http://google.com')->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) { if (preg_match("#<title>(.*)</title>#i", $request->getResponseText(), $out)) { $title = $out[1]; } else { $title = '[No Title Tag Found]'; } echo "Fetch complete for (" . $request->getUrl() . ") {$title} " . PHP_EOL; })->execute();
<?php require __DIR__ . '/vendor/autoload.php'; if (!isset($argv[1])) { echo 'No board name given (i.e. "Southpark_Sounds_3").'; die(1); } $savePath = __DIR__ . '/sounds/'; $boardName = $argv[1]; $json = file_get_contents('http://www.soundboard.com/handler/gettrackjson.ashx?boardname=' . $boardName); $entries = json_decode($json, true); if (empty($entries)) { echo sprintf('No sounds could be found for board "%s"', $boardName); die(1); } $rollingCurl = new \RollingCurl\RollingCurl(); $fileNameMap = []; foreach ($entries as $entry) { $rollingCurl->get($entry['mp3']); $fileNameMap[$entry['mp3']] = $entry['title']; } $rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use($fileNameMap, $savePath) { $fileName = $fileNameMap[$request->getUrl()]; if (!isset($request->getResponseInfo()['content_type']) || 'audio/mpeg' !== $request->getResponseInfo()['content_type']) { echo sprintf('File "%s" is no audio/mpeg file.%s', $fileName, PHP_EOL); } else { $fileNameExt = $fileName . '.mp3'; file_put_contents($savePath . $fileNameExt, $request->getResponseText()); echo sprintf('Downloaded: %s%s', $fileNameExt, PHP_EOL); } })->setSimultaneousLimit(3)->execute();