Ejemplo n.º 1
0
 /**
  * Crawl all links in the given html.
  *
  * @param string $html
  */
 protected function crawlAllLinks($html)
 {
     $allLinks = $this->getAllLinks($html);
     collect($allLinks)->reject(function (Url $url) {
         return $url->isEmailUrl() || $url->isTelUrl() || $url->isJavascript();
     })->map(function (Url $url) {
         return $this->normalizeUrl($url);
     })->filter(function (Url $url) {
         return $this->crawlProfile->shouldCrawl($url);
     })->each(function (Url $url) {
         $this->crawlUrl($url);
     });
 }
Ejemplo n.º 2
0
 protected function addAllLinksToCrawlQueue(string $html, Url $foundOnUrl)
 {
     $allLinks = $this->extractAllLinks($html);
     collect($allLinks)->filter(function (Url $url) {
         return $url->hasCrawlableScheme();
     })->map(function (Url $url) {
         return $this->normalizeUrl($url);
     })->filter(function (Url $url) {
         return $this->crawlProfile->shouldCrawl($url);
     })->reject(function ($url) {
         return $this->crawlQueue->has($url);
     })->each(function (Url $url) use($foundOnUrl) {
         $this->crawlQueue->add(CrawlUrl::create($url, $foundOnUrl));
     });
 }