public function onFinish(FilterCrawlerProcessEvent $event)
 {
     $rollingCurl = new \RollingCurl\RollingCurl();
     $rollingCurl->setSimultaneousLimit(50);
     // kicks out javascript:void(0) and # urls
     $validator = new NoPseudoUrl();
     // loop through all the links and add them to rollingcurl
     foreach ($this->links as &$link) {
         if ($validator->isValid($link)) {
             // add get request to curl
             $rollingCurl->get($link->getLinkHref(), null, array(&$link));
         }
     }
     $brokenLinks = array();
     $rollingCurl->setCallback(function (\RollingCurl\Request $request, \RollingCurl\RollingCurl $rollingCurl) use(&$brokenLinks) {
         $link = $request->getOptions()[0];
         echo "checking -> " . $link->getLinkHref() . "\n";
         $link->setStatusCode($request->getResponseInfo()['http_code']);
         if ($link->getStatusCode() != 200) {
             $brokenLinks[] = $link;
         }
     });
     $rollingCurl->execute();
     $this->brokenLinks = $brokenLinks;
 }
Esempio n. 2
0
 public function testIsValid()
 {
     $validator = new NoPseudoUrl();
     $this->assertTrue($validator->isValid(new Link('http://www.google.com/js#test')));
     $this->assertTrue($validator->isValid(new Link('https://www.google.com/js#test')));
     $this->assertFalse($validator->isValid(new Link('javascript:void(0)')));
     $this->assertFalse($validator->isValid(new Link('callto:213129')));
 }