Пример #1
0
 /**
  * 自动采集 ,根据mysql里的url记录
  */
 public function autoCollect()
 {
     set_time_limit(0);
     $countNum = 1000;
     $commonCurl = new CommonCurl();
     for ($i = 0; $i < $countNum; $i++) {
         $crawlerUrl = $this->getNotCollectCrawlerUrl();
         if (empty($crawlerUrl['url'])) {
             break;
         }
         try {
             $htmlString = $commonCurl->baseCurl($crawlerUrl['url']);
             $htmlString = $commonCurl->formatEncodingUtf8($htmlString);
             $urls = $this->findA($htmlString);
             if (!empty($urls)) {
                 $this->addCrawlerUrls($crawlerUrl['url'], $urls);
             }
             $content = $this->findContent($htmlString);
             $crawlerUrl->request_status = 2;
             $crawlerUrl->analyze_status = 2;
             $crawlerUrl->save();
             $this->addCrawlerUrlContent($crawlerUrl['id'], $content);
         } catch (Exception $exp) {
             //请求失败;
             $crawlerUrl->request_status = 3;
             $crawlerUrl->desc = $exp->getMessage();
             $crawlerUrl->save();
         }
     }
     return true;
 }