Exemple #1
0
 public function getLink(array $linkData, Domain $domain, Link $origin)
 {
     $deriver = new \webignition\AbsoluteUrlDeriver\AbsoluteUrlDeriver($linkData['href'], $origin->getLinkHref());
     $link = new Link((string) $deriver->getAbsoluteUrl());
     $link->setOriginDomain($domain);
     $link->setLinkText($linkData['text']);
     $link->setOrigin($origin);
     return $link;
 }
 public function isValid(Link $link)
 {
     return preg_match($this->evil_pattern, $link->getLinkHref()) === 0;
 }
Exemple #3
0
 public function testGetHash()
 {
     $link = new Link('yolo');
     $this->assertEquals(sha1('yolo'), $link->getHash());
 }
 public function isValid(Link $link)
 {
     return strpos($link->getLinkHref(), 'yolo') !== false;
 }
 public function isValid(Link $link)
 {
     return $link->getLinkHref() === 'http://dmoz.com/';
 }
 private function findLinksAndAddToQueue(Link $origin, &$process)
 {
     $html = $origin->getHtml();
     $crawled_links = $this->finder->getLinks($html);
     $links = array();
     foreach ($crawled_links as $link_data) {
         $link = $this->linkFactory->getLink($link_data, $this->domain, $origin);
         $links[] = $link;
         $this->pushLinkToQueue($link);
     }
     $this->dispatcher->dispatch(CrawlerEvents::onFoundLinks, new FoundLinksEvent($links, $process));
 }