public function isSatisfiedBy($page) { if (!$page instanceof Page) { throw new \InvalidArgumentException('Expected Scanner\\Entity\\Page'); } return $this->blacklist->contains($page); }
/** * @return PagesCollection New Collection with all found links */ public function findLinkedPages() { $pages = new PagesCollection(); $crawler = $this->getCrawler(); foreach ($crawler->filter('a') as $node) { $link = new Link($node, $this->uri); $page = new Page($link->getUri()); $page->setClient($this->client); $pages->add($page); } return $pages; }
/** @return PagesCollection All spidered pages */ public function spider() { $todo = clone $this->startpages; $done = new PagesCollection(); $spec = $this->getPageSpecification(); while (count($todo)) { $current = $todo->pop(); if (!$done->contains($current)) { foreach ($current->findLinkedPages() as $found) { if (!$done->contains($found) && $spec->isSatisfiedBy($found)) { $todo->add($found); } } } $done->add($current); } return $done; }
/** @test */ public function UriDeterminesEquality() { $collection = new PagesCollection(array($page1 = new Page('http://example/foo'), $page2 = new Page('http://example/foo'))); $collection->add($page1); $this->assertTrue($collection->contains($page2), "When a Page with the same URI already exists in the Collection, contains() should return true"); }