public function getLink(array $linkData, Domain $domain, Link $origin) { $deriver = new \webignition\AbsoluteUrlDeriver\AbsoluteUrlDeriver($linkData['href'], $origin->getLinkHref()); $link = new Link((string) $deriver->getAbsoluteUrl()); $link->setOriginDomain($domain); $link->setLinkText($linkData['text']); $link->setOrigin($origin); return $link; }
public function isValid(Link $link) { return preg_match($this->evil_pattern, $link->getLinkHref()) === 0; }
public function testGetHash() { $link = new Link('yolo'); $this->assertEquals(sha1('yolo'), $link->getHash()); }
public function isValid(Link $link) { return strpos($link->getLinkHref(), 'yolo') !== false; }
public function isValid(Link $link) { return $link->getLinkHref() === 'http://dmoz.com/'; }
private function findLinksAndAddToQueue(Link $origin, &$process) { $html = $origin->getHtml(); $crawled_links = $this->finder->getLinks($html); $links = array(); foreach ($crawled_links as $link_data) { $link = $this->linkFactory->getLink($link_data, $this->domain, $origin); $links[] = $link; $this->pushLinkToQueue($link); } $this->dispatcher->dispatch(CrawlerEvents::onFoundLinks, new FoundLinksEvent($links, $process)); }