hasBeenCrawled() 공개 메소드

Called when the crawler has crawled the given url.
public hasBeenCrawled ( Url $url, Psr\Http\Message\ResponseInterface | null $response, Url $foundOnUrl = null ) : void
$url Url
$response Psr\Http\Message\ResponseInterface | null
$foundOnUrl Url
리턴 void
예제 #1
0
파일: Crawler.php 프로젝트: spatie/crawler
 /**
  * Crawl the given url.
  *
  * @param \Spatie\Crawler\Url $url
  */
 protected function crawlUrl(Url $url)
 {
     if (!$this->crawlProfile->shouldCrawl($url)) {
         return;
     }
     if ($this->hasAlreadyCrawled($url)) {
         return;
     }
     $this->crawlObserver->willCrawl($url);
     try {
         $response = $this->client->request('GET', (string) $url);
     } catch (RequestException $exception) {
         $response = $exception->getResponse();
     }
     $this->crawlObserver->hasBeenCrawled($url, $response);
     $this->crawledUrls->push($url);
     if (!$response) {
         return;
     }
     if ($url->host === $this->baseUrl->host) {
         $this->crawlAllLinks($response->getBody()->getContents());
     }
 }
예제 #2
0
파일: Crawler.php 프로젝트: spatie/crawler
 /**
  * @param ResponseInterface|null $response
  * @param int $index
  */
 protected function handleResponse($response, int $index)
 {
     $crawlUrl = $this->crawlQueue->getPendingUrlAtIndex($index);
     $this->crawlObserver->hasBeenCrawled($crawlUrl->url, $response, $crawlUrl->foundOnUrl);
 }