コード例 #1
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle(Client $client)
 {
     Crawler::create(['url' => $this->argument('url')]);
     return;
     Crawler::where('url', '=', $this->argument('url'))->delete();
     $crawler = Crawler::create(['url' => $this->argument('url')]);
     $html = $client->get($this->argument('url'))->getBody();
     $dom = new DomCrawler();
     $dom->addHtmlContent($html);
     $spider = new Spider($crawler, $dom);
     $spider->get();
 }
コード例 #2
0
 public function crawl($id)
 {
     $links = $this->getLinks($id);
     $uuid = uniqid();
     Log::info("Starting to crawl {$uuid}... " . Carbon::now()->toDateTimeString());
     $counter = 0;
     foreach ($links as $link) {
         try {
             $request = $this->client->get($link->url);
             if ($request->getStatusCode() == 404) {
                 $link->delete();
                 continue;
             } else {
                 if ($request->getStatusCode() != 200) {
                     $link->ind_crawled = false;
                     $link->failed_tries = $link->failed_tries + 1;
                     $link->save();
                     Log::info("Erro de requisição: STATUS " . $request->getStatusCode());
                     continue;
                 }
             }
             DB::beginTransaction();
             try {
                 $html = $request->getBody();
                 $dom = new DomCrawler();
                 $dom->addHtmlContent($html);
                 if ($this->isProduct($link->url)) {
                     $spider = new Spider($link, $dom);
                     $spider->get();
                 }
                 foreach ($dom->filter("a") as $anchor) {
                     $url = $this->normalizeUrl($anchor->getAttribute("href"));
                     if ($url === false) {
                         continue;
                     }
                     $exists = CrawlerTable::where('url', '=', $url)->first();
                     if (!empty($exists)) {
                         continue;
                     }
                     try {
                         $this->crawler->create(['url' => $url]);
                         $counter++;
                     } catch (Exception $e) {
                         Log::info($e->getMessage());
                     }
                 }
                 $link->failed_tries = 0;
                 $link->error_log = null;
                 $link->save();
                 DB::commit();
             } catch (Exception $e) {
                 DB::rollBack();
                 Log::info($e->getMessage() . " / Url: " . $link->url);
                 Log::info($e->getTraceAsString());
                 $link->ind_crawled = false;
                 $link->failed_tries = $link->failed_tries + 1;
                 $link->error_log = $e->getMessage() . PHP_EOL . PHP_EOL . $e->getTraceAsString();
                 $link->save();
             } finally {
                 $dom = null;
                 $html = null;
             }
         } catch (Exception $e) {
             Log::info("Houve um erro na requisição: ");
             continue;
         }
     }
     Log::info("Crawling finished {$uuid}, Links inseridos: {$counter}!");
     session()->set('crawler', session()->get('crawler') - 1);
 }