public function testPushAndPopWithAValidator()
 {
     $queue = new ArrayQueue();
     $manager = new QueueManager($queue, new ArrayStore());
     $manager->addValidator(new YoloValidator());
     $manager->push($this->newLink('http://codebuster.de/blub/test', 'codebuster.de'));
     $manager->push($this->newLink('http://codebuster.de/yolo', 'codebuster.de'));
     $manager->push($this->newLink('http://codebuster.de/test', 'codebuster.de'));
     $this->assertEquals('http://codebuster.de/yolo', $manager->pop()->getLinkHref());
     $this->assertFalse($manager->pop());
 }
 private function createCrawler(Body $body)
 {
     $response = \Mockery::mock('\\GuzzleHttp\\Message\\FutureResponse');
     $response->shouldReceive('getBody')->andReturn($body)->shouldReceive('getStatusCode')->andReturn(200);
     $client = \Mockery::mock('\\GuzzleHttp\\Client');
     $client->shouldReceive('get')->andReturn($response);
     $downloader = new PageDownloader();
     $downloader->setClient($client);
     $manager = new QueueManager(new ArrayQueue(), new ArrayStore());
     $manager->addValidator(new FuckThatLink());
     $crawler = new DomainCrawler($manager, new LinkFinder());
     $crawler->setDownloader($downloader);
     return $crawler;
 }
Beispiel #3
0
<?php

require 'bootstrap/autoload.php';
use WP\Crawler\LinkFinder;
use WP\Crawler\DomainCrawler;
use WP\Crawler\Queue\QueueManager;
use WP\Crawler\Queue\ArrayQueue;
use WP\Crawler\Queue\Store\ArrayStore;
use WP\Crawler\Queue\Validator\ValidFileExtension;
use WP\Crawler\Queue\Validator\NoPseudoUrl;
use WP\Crawler\Event\LogSubscriber;
use WP\Crawler\Event\BrokenLinkFinderSubscriber;
use Symfony\Component\EventDispatcher\EventDispatcher;
if (isset($argv[1])) {
    $domain = $argv[1];
    $manager = new QueueManager(new ArrayQueue(), new ArrayStore());
    $manager->addValidator(new NoPseudoUrl())->addValidator(new ValidFileExtension());
    $crawler = new DomainCrawler($manager, new LinkFinder());
    if (isset($argv[2])) {
        $crawler->setWaitTime($argv[2]);
    }
    $dispatcher = $crawler->getEventDispatcher();
    $dispatcher->addSubscriber(new LogSubscriber());
    /* $dispatcher->addSubscriber(new BrokenLinkFinderSubscriber); */
    $crawler->crawl($domain);
} else {
    echo "\n";
    echo "Usage " . $argv[0] . ' {domain} {time to wait}' . "\n";
}