public function getLink(array $linkData, Domain $domain, Link $origin) { $deriver = new \webignition\AbsoluteUrlDeriver\AbsoluteUrlDeriver($linkData['href'], $origin->getLinkHref()); $link = new Link((string) $deriver->getAbsoluteUrl()); $link->setOriginDomain($domain); $link->setLinkText($linkData['text']); $link->setOrigin($origin); return $link; }
public function isValid(Link $link) { return preg_match($this->evil_pattern, $link->getLinkHref()) === 0; }
public function isValid(Link $link) { return $link->getLinkHref() === 'http://dmoz.com/'; }
public function isValid(Link $link) { return strpos($link->getLinkHref(), 'yolo') !== false; }
private function downloadPage(Link $link, &$process) { $response = null; try { $response = $this->downloader->download($link->getLinkHref(), $this->download_tries); } catch (DownloadException $e) { $link->setStatusCode(69); } $this->dispatcher->dispatch(CrawlerEvents::onPageDownload, new FilterPageResponseEvent($link, $response, $process)); return $response; }