/** * @param CrawlUrl|Url $crawlUrl * * @return bool */ public function has($crawlUrl) : bool { if ($crawlUrl instanceof Url) { $crawlUrl = CrawlUrl::create($crawlUrl); } if ($this->contains($this->pending, $crawlUrl)) { return true; } if ($this->contains($this->processed, $crawlUrl)) { return true; } return false; }
protected function addAllLinksToCrawlQueue(string $html, Url $foundOnUrl) { $allLinks = $this->extractAllLinks($html); collect($allLinks)->filter(function (Url $url) { return $url->hasCrawlableScheme(); })->map(function (Url $url) { return $this->normalizeUrl($url); })->filter(function (Url $url) { return $this->crawlProfile->shouldCrawl($url); })->reject(function ($url) { return $this->crawlQueue->has($url); })->each(function (Url $url) use($foundOnUrl) { $this->crawlQueue->add(CrawlUrl::create($url, $foundOnUrl)); }); }