/** * @return \yii\db\ActiveQuery */ public function getPars0() { return $this->hasOne(ParsSettings::className(), ['id' => 'pars']); }
public function actionIndex() { Curl::cookieFile(true); $settings = ParsSettings::find()->where("operation = 'no' AND status = 'active'", [])->orderBy("id asc")->one(); if (!$settings) { exit; } $settings->operation = 'yes'; $settings->save(); $parsId = $settings->id; // $baseUrl = 'http://www.yelp.com/search?find_desc=dentist&find_loc=Dallas,+TX'; $baseUrl = 'http://www.yelp.com/search?find_desc=' . urlencode($settings->search) . ''; if ($settings->city) { $baseUrl .= '&find_loc=' . urlencode($settings->city); if ($settings->state) { $baseUrl .= ',' . urlencode(' ' . $settings->state); } } elseif ($settings->state) { $baseUrl .= '&find_loc=' . urlencode($settings->state); } $this->runInFile($baseUrl, 'txt', 'lastUrl'); $page = Curl::get($baseUrl . '&start=0'); $max = (int) Operation::maxResult($page); if ($max < 1) { exit; } $this->runInFile($max, 'txt', 'lastMax'); $maxPage = ceil($max / 10); // $maxPage = 1000; $data = []; $this->runInFileLink(join(PHP_EOL, $data), $parsId, true); for ($i = 0; $i <= $maxPage; $i += 10) { $url = $baseUrl . '&start=' . $i; $url = Curl::get($url); $page = Operation::allLinks($url); if (is_array($page) && sizeof($page) > 0) { $page = Operation::urlCorrect($page); // $data = ArrayHelper::merge($data, $page); $this->runInFileLink(join(PHP_EOL, $page), $parsId); } $time = rand(50, 200) * 10000; usleep($time); } unset($page); unset($data); unset($url); $links = $this->getInFileLink($parsId); $links = explode(PHP_EOL, $links); foreach ($links as $link) { $link = trim($link); if (!$link) { continue; } $link = 'http://www.yelp.com' . $link; $page = Curl::get($link); $params = Operation::gerParams($page); $categoriesIds = OperationCategory::forId($params['categories']); if (!$params['Operation']['name']) { continue; } $params['Operation']['pars'] = $parsId; $itemId = Operation::addNew($params); if ($itemId && $categoriesIds) { OperationCategoryJoin::addNew($itemId, $categoriesIds); } $time = rand(50, 200) * 10000; usleep($time); } $this->delInFileLink($parsId); $settings->operation = 'yes'; $settings->status = 'finished'; $settings->save(); $this->runInFile(sizeof($links), 'html', 'finish'); }