示例#1
0
文件: Operation.php 项目: comaw/pars
 /**
  * @return \yii\db\ActiveQuery
  */
 public function getPars0()
 {
     return $this->hasOne(ParsSettings::className(), ['id' => 'pars']);
 }
示例#2
0
 public function actionIndex()
 {
     Curl::cookieFile(true);
     $settings = ParsSettings::find()->where("operation = 'no' AND status = 'active'", [])->orderBy("id asc")->one();
     if (!$settings) {
         exit;
     }
     $settings->operation = 'yes';
     $settings->save();
     $parsId = $settings->id;
     //        $baseUrl = 'http://www.yelp.com/search?find_desc=dentist&find_loc=Dallas,+TX';
     $baseUrl = 'http://www.yelp.com/search?find_desc=' . urlencode($settings->search) . '';
     if ($settings->city) {
         $baseUrl .= '&find_loc=' . urlencode($settings->city);
         if ($settings->state) {
             $baseUrl .= ',' . urlencode(' ' . $settings->state);
         }
     } elseif ($settings->state) {
         $baseUrl .= '&find_loc=' . urlencode($settings->state);
     }
     $this->runInFile($baseUrl, 'txt', 'lastUrl');
     $page = Curl::get($baseUrl . '&start=0');
     $max = (int) Operation::maxResult($page);
     if ($max < 1) {
         exit;
     }
     $this->runInFile($max, 'txt', 'lastMax');
     $maxPage = ceil($max / 10);
     //        $maxPage = 1000;
     $data = [];
     $this->runInFileLink(join(PHP_EOL, $data), $parsId, true);
     for ($i = 0; $i <= $maxPage; $i += 10) {
         $url = $baseUrl . '&start=' . $i;
         $url = Curl::get($url);
         $page = Operation::allLinks($url);
         if (is_array($page) && sizeof($page) > 0) {
             $page = Operation::urlCorrect($page);
             //                $data = ArrayHelper::merge($data, $page);
             $this->runInFileLink(join(PHP_EOL, $page), $parsId);
         }
         $time = rand(50, 200) * 10000;
         usleep($time);
     }
     unset($page);
     unset($data);
     unset($url);
     $links = $this->getInFileLink($parsId);
     $links = explode(PHP_EOL, $links);
     foreach ($links as $link) {
         $link = trim($link);
         if (!$link) {
             continue;
         }
         $link = 'http://www.yelp.com' . $link;
         $page = Curl::get($link);
         $params = Operation::gerParams($page);
         $categoriesIds = OperationCategory::forId($params['categories']);
         if (!$params['Operation']['name']) {
             continue;
         }
         $params['Operation']['pars'] = $parsId;
         $itemId = Operation::addNew($params);
         if ($itemId && $categoriesIds) {
             OperationCategoryJoin::addNew($itemId, $categoriesIds);
         }
         $time = rand(50, 200) * 10000;
         usleep($time);
     }
     $this->delInFileLink($parsId);
     $settings->operation = 'yes';
     $settings->status = 'finished';
     $settings->save();
     $this->runInFile(sizeof($links), 'html', 'finish');
 }