/** * Crawl all links in the given html. * * @param string $html */ protected function crawlAllLinks($html) { $allLinks = $this->getAllLinks($html); collect($allLinks)->reject(function (Url $url) { return $url->isEmailUrl() || $url->isTelUrl() || $url->isJavascript(); })->map(function (Url $url) { return $this->normalizeUrl($url); })->filter(function (Url $url) { return $this->crawlProfile->shouldCrawl($url); })->each(function (Url $url) { $this->crawlUrl($url); }); }
protected function addAllLinksToCrawlQueue(string $html, Url $foundOnUrl) { $allLinks = $this->extractAllLinks($html); collect($allLinks)->filter(function (Url $url) { return $url->hasCrawlableScheme(); })->map(function (Url $url) { return $this->normalizeUrl($url); })->filter(function (Url $url) { return $this->crawlProfile->shouldCrawl($url); })->reject(function ($url) { return $this->crawlQueue->has($url); })->each(function (Url $url) use($foundOnUrl) { $this->crawlQueue->add(CrawlUrl::create($url, $foundOnUrl)); }); }