public function testPushAndPopWithAValidator() { $queue = new ArrayQueue(); $manager = new QueueManager($queue, new ArrayStore()); $manager->addValidator(new YoloValidator()); $manager->push($this->newLink('http://codebuster.de/blub/test', 'codebuster.de')); $manager->push($this->newLink('http://codebuster.de/yolo', 'codebuster.de')); $manager->push($this->newLink('http://codebuster.de/test', 'codebuster.de')); $this->assertEquals('http://codebuster.de/yolo', $manager->pop()->getLinkHref()); $this->assertFalse($manager->pop()); }
private function createCrawler(Body $body) { $response = \Mockery::mock('\\GuzzleHttp\\Message\\FutureResponse'); $response->shouldReceive('getBody')->andReturn($body)->shouldReceive('getStatusCode')->andReturn(200); $client = \Mockery::mock('\\GuzzleHttp\\Client'); $client->shouldReceive('get')->andReturn($response); $downloader = new PageDownloader(); $downloader->setClient($client); $manager = new QueueManager(new ArrayQueue(), new ArrayStore()); $manager->addValidator(new FuckThatLink()); $crawler = new DomainCrawler($manager, new LinkFinder()); $crawler->setDownloader($downloader); return $crawler; }
<?php require 'bootstrap/autoload.php'; use WP\Crawler\LinkFinder; use WP\Crawler\DomainCrawler; use WP\Crawler\Queue\QueueManager; use WP\Crawler\Queue\ArrayQueue; use WP\Crawler\Queue\Store\ArrayStore; use WP\Crawler\Queue\Validator\ValidFileExtension; use WP\Crawler\Queue\Validator\NoPseudoUrl; use WP\Crawler\Event\LogSubscriber; use WP\Crawler\Event\BrokenLinkFinderSubscriber; use Symfony\Component\EventDispatcher\EventDispatcher; if (isset($argv[1])) { $domain = $argv[1]; $manager = new QueueManager(new ArrayQueue(), new ArrayStore()); $manager->addValidator(new NoPseudoUrl())->addValidator(new ValidFileExtension()); $crawler = new DomainCrawler($manager, new LinkFinder()); if (isset($argv[2])) { $crawler->setWaitTime($argv[2]); } $dispatcher = $crawler->getEventDispatcher(); $dispatcher->addSubscriber(new LogSubscriber()); /* $dispatcher->addSubscriber(new BrokenLinkFinderSubscriber); */ $crawler->crawl($domain); } else { echo "\n"; echo "Usage " . $argv[0] . ' {domain} {time to wait}' . "\n"; }