public static function add($url) { $ret = ['code' => 200, 'data' => []]; $has_in = SpiderQueue::findOne(['hash_url' => md5($url)]); if ($has_in) { $ret['data'] = $has_in; return $ret; } $date_now = date("Y-m-d H:i:s"); $model_queue = new SpiderQueue(); $model_queue->url = $url; $model_queue->hash_url = md5($url); $model_queue->status = -2; $model_queue->updated_time = $date_now; $model_queue->created_time = $date_now; $model_queue->save(0); return $ret; }
public function actionRobot() { $date_now = date("Y-m-d H:i:s"); $queue_list = SpiderQueue::find()->where(['status' => -2])->orderBy("id asc")->limit(1)->all(); if (!$queue_list) { $this->echoLog("{$date_now} -- no data"); return; } $route_mapping = SpiderService::$allow_hosts; foreach ($queue_list as $_info) { $_info->status = -1; $_info->update(0); $tmp_url_info = parse_url($_info['url']); $tmp_host = $tmp_url_info['host']; if (!isset($route_mapping[$tmp_host])) { $this->echoLog("-------queue_id:{$_info['id']},date:{$date_now},not allow host url:{$_info['url']}----------"); $_info->status = 0; $_info->update(0); continue; } $this->echoLog("-------queue_id:{$_info['id']},date:{$date_now},url:{$_info['url']}----------"); $tmp_action = $route_mapping[$tmp_host]; $ret = call_user_func_array([$this, "crawl_{$tmp_action}"], [$_info['url']]); if (!$ret) { $_info->status = 0; $_info->update(0); continue; } $post_id = $this->save2blog($ret['content'], $ret['title'], $_info['url']); if (!$post_id) { $_info->status = 0; $_info->update(0); continue; } $_info->post_id = $post_id; $_info->status = 1; $_info->update(0); } }