public function crawlWebPage(Webpage $webpage, $mode = 'normal') { if (!$webpage->needsCrawl()) { return false; } // if it will crash we will remember it $webpage->last_status_code = Webpage::STATUS_CODE_CRASH; $webpage->save(); $url = $webpage->url; if ($mode == 'debug') { //echo $url; return true; } $crawler = new \Arachnid\Crawler($url, 1); $crawler->traverse(); $html = $crawler->getHtml(); $binary = $crawler->getBinary(); $tmpLinks = $crawler->getInternalLinks(); $links = $this->filterLinks($tmpLinks, $webpage); $statusCode = $crawler->getStatusCode(); $snapshot = Snapshot::create(['html' => $html, 'binary' => $binary, 'webpage_id' => $webpage->id, 'status_code' => $statusCode]); $snapshot->processChange(); foreach ($links as $link) { Webpage::firstOrCreate(['url' => $link, 'site_id' => $webpage->site_id]); } $webpage->crawlcount++; $webpage->last_status_code = $statusCode; $webpage->save(); return true; }