Called when the crawler has crawled the given url.
public hasBeenCrawled ( |
||
$url | ||
$response | Psr\Http\Message\ResponseInterface | null | |
$foundOnUrl | ||
return | void |
/** * Crawl the given url. * * @param \Spatie\Crawler\Url $url */ protected function crawlUrl(Url $url) { if (!$this->crawlProfile->shouldCrawl($url)) { return; } if ($this->hasAlreadyCrawled($url)) { return; } $this->crawlObserver->willCrawl($url); try { $response = $this->client->request('GET', (string) $url); } catch (RequestException $exception) { $response = $exception->getResponse(); } $this->crawlObserver->hasBeenCrawled($url, $response); $this->crawledUrls->push($url); if (!$response) { return; } if ($url->host === $this->baseUrl->host) { $this->crawlAllLinks($response->getBody()->getContents()); } }
/** * @param ResponseInterface|null $response * @param int $index */ protected function handleResponse($response, int $index) { $crawlUrl = $this->crawlQueue->getPendingUrlAtIndex($index); $this->crawlObserver->hasBeenCrawled($crawlUrl->url, $response, $crawlUrl->foundOnUrl); }