コード例 #1
0
 public function crawlWebPage(Webpage $webpage, $mode = 'normal')
 {
     if (!$webpage->needsCrawl()) {
         return false;
     }
     // if it will crash we will remember it
     $webpage->last_status_code = Webpage::STATUS_CODE_CRASH;
     $webpage->save();
     $url = $webpage->url;
     if ($mode == 'debug') {
         //echo $url;
         return true;
     }
     $crawler = new \Arachnid\Crawler($url, 1);
     $crawler->traverse();
     $html = $crawler->getHtml();
     $binary = $crawler->getBinary();
     $tmpLinks = $crawler->getInternalLinks();
     $links = $this->filterLinks($tmpLinks, $webpage);
     $statusCode = $crawler->getStatusCode();
     $snapshot = Snapshot::create(['html' => $html, 'binary' => $binary, 'webpage_id' => $webpage->id, 'status_code' => $statusCode]);
     $snapshot->processChange();
     foreach ($links as $link) {
         Webpage::firstOrCreate(['url' => $link, 'site_id' => $webpage->site_id]);
     }
     $webpage->crawlcount++;
     $webpage->last_status_code = $statusCode;
     $webpage->save();
     return true;
 }