public function actionSend($name) { echo "welcome to " . $name; /* $db = \Yii::$app->db; $command = $db->createCommand('SELECT * FROM article_info'); $posts = $command->queryAll(); var_dump($posts);die; */ $mvSpider = MvSpider::find()->where(['status' => 0])->orderBy('id'); $mvSpider->limit(); $rs = $mvSpider->count(); #$rs = MvSpider::find()->where(['status'=>0])->orderBy('id')->queryAll(); var_dump($rs); die; // 获取 country 表的所有行并以 name 排序 //$countries = Country::find()->orderBy('name')->all(); // 获取主键为 “US” 的行 //$country = Country::findOne('US'); // 输出 “United States” //echo $country->name; /* // 修改 name 为 “U.S.A.” 并在数据库中保存更改 $country->name = 'U.S.A.'; $country->save(); */ }
/** * 抓去豆瓣数据 by api */ public function actionDouban() { $api = new ApiDouban(); $mvSpider = MvSpider::find()->where(['status' => 0])->orderBy('id asc'); $total = $mvSpider->count(); $page = 1; $pageSize = 40; $totalPage = ceil($total / $pageSize); $mvSpider->limit($pageSize)->asArray(); for ($page = 1; $page <= $totalPage; $page++) { $offset = ($page - 1) * $pageSize; $data = $mvSpider->offset($offset)->all(); foreach ($data as $v) { if (MvBasic::findOne(['douban_id' => $v['subject_id']])) { echo 'doubanID:', $v['subject_id'], " exists \n"; MvSpider::updateAll(['status' => 1], "subject_id={$v['subject_id']}"); continue; } $rawData = $api->getMovie($v['subject_id']); if (!$rawData) { echo 'doubanApi:', $v['subject_id'], " empty \n"; continue; } $infoData = $this->formatMovie($rawData); if (is_array($infoData)) { // mv_basic $basic_id = 0; $MvBasic = new MvBasic(); foreach ($infoData as $k1 => $v1) { $MvBasic->{$k1} = $v1; } // pinyin if (isset($infoData['title'])) { $py_title = PinyinUtil::getFullPy($infoData['title']); $MvBasic->py_title = PinyinUtil::subPinyin($py_title); } if (isset($infoData['original_title'])) { $py_original_title = PinyinUtil::getFullPy($infoData['original_title']); $MvBasic->py_original_title = PinyinUtil::subPinyin($py_original_title); } $MvBasic->save(); $basic_id = $MvBasic->primaryKey; // movie type $movieType = $rawData['genres']; foreach ($movieType as $t) { // mv_basic_condition type $cond = models\MvBasicCondition::find()->where(['name' => $t, 'cond_type' => 1])->asArray()->one(); if (empty($cond)) { $MvBasicType = new models\MvBasicCondition(); $MvBasicType->cond_type = 1; $MvBasicType->name = $t; $MvBasicType->update_time = time(); $MvBasicType->save(); $cond_id = $MvBasicType->primaryKey; } else { $cond_id = $cond['id']; } // mv_basic_type if (!models\MvBasicType::findOne(['basic_id' => $basic_id, 'cate_id' => $cond_id])) { $MvBasicType = new models\MvBasicType(); $MvBasicType->basic_id = $basic_id; $MvBasicType->cate_id = $cond_id; $MvBasicType->save(); } } // country $movieCountry = $rawData['countries']; foreach ($movieCountry as $t) { // mv_basic_condition country $cond = models\MvBasicCondition::find()->where(['name' => $t, 'cond_type' => 2])->asArray()->one(); if (empty($cond)) { $MvBasicType = new models\MvBasicCondition(); $MvBasicType->cond_type = 2; $MvBasicType->name = $t; $MvBasicType->update_time = time(); $MvBasicType->save(); $cond_id = $MvBasicType->primaryKey; } else { $cond_id = $cond['id']; } // mv_basic_country if (!models\MvBasicCountry::findOne(['basic_id' => $basic_id, 'country_id' => $cond_id])) { $MvBasicCountry = new models\MvBasicCountry(); $MvBasicCountry->basic_id = $basic_id; $MvBasicCountry->country_id = $cond_id; $MvBasicCountry->save(); } } // mod mv_spider.status=1 if ($basic_id) { MvSpider::updateAll(['status' => 1], "subject_id={$v['subject_id']}"); } } sleep(0.5); // movieInfo end } //sleep(1); // page end } }
public function actionMarksource() { $db_api = new ApiDouban(); // mv_source_temp $tempSource = models\MvSourceTemp::find()->where('basic_id=0 and type!=0')->orderBy('id asc'); $total = $tempSource->count(); $page = 1; $pageSize = 40; $totalPage = ceil($total / $pageSize); $tempSource->limit($pageSize)->asArray(); for ($page = 1; $page <= $totalPage; $page++) { // 分页 $offset = ($page - 1) * $pageSize; $tempSourceData = $tempSource->offset($offset)->all(); foreach ($tempSourceData as $v) { // 遍历temp数据 echo "tempSource: {$v['title']} \n"; $searchRs = $db_api->searchpj($v['title']); // douban search //echo "douban search Rs: {$searchRs} \n"; //if(isset($searchRs['subjects']) && !empty($searchRs['subjects'])) { //foreach($searchRs['subjects'] as $s) { if (isset($searchRs) && !empty($searchRs)) { foreach ($searchRs as $s) { //$subject_id = $s['id']; // 匹配subject_id $reg = '/subject\\/(\\d+?)\\//'; preg_match($reg, $s['url'], $match); $subject_id = isset($match['1']) ? $match['1'] : 0; //var_dump($match);die; echo "douban Search: {$subject_id} {$s['title']}\n"; if (!$subject_id) { echo "douban Subject_id empty: {$subject_id} {$s['title']}\n"; continue; } // spider是否存在subject_id $spiderRs = models\MvSpider::find()->where(['subject_id' => $subject_id])->asArray()->all(); if (!$spiderRs) { $MvSpider = new models\MvSpider(); $MvSpider->subject_id = $subject_id; $MvSpider->title = $s['title']; $MvSpider->from_tag = 'search'; $MvSpider->status = 20; $MvSpider->create_time = time(); $MvSpider->save(); echo "spider save {$subject_id} \n\n"; } else { // basic是否存在subject_id $basicRs = models\MvBasic::find()->where(['douban_id' => $subject_id])->asArray()->one(); if ($basicRs && isset($basicRs['id']) && !empty($basicRs['id'])) { echo "basicRs found subject_id: {$subject_id} temp: {$v['id']}-{$v['title']} basic_id: {$basicRs['id']} basic_title: {$basicRs['title']} \n"; // 对比 year,title if ($v['year'] == $s['year']) { // 同一年 similar_text($v['title'], $s['title'], $percent); echo "vs Percent: {$percent} \n"; if ($percent >= 100) { // insert mv_source $MvSpider = new models\MvSource(); $MvSpider->site_id = 10; $MvSpider->basic_id = $basicRs['id']; $MvSpider->type = $v['type']; $MvSpider->name = $v['name']; $MvSpider->download_url = $v['download_url']; $MvSpider->play_url = $v['play_url']; $MvSpider->ext = $v['ext']; $MvSpider->update_time = time(); $MvSpider->create_time = time(); $MvSpider->save(); // update mv_source_temp [basic_id] models\MvSourceTemp::updateAll(['basic_id' => $basicRs['id']], "id={$v['id']}"); // update mv_basic [update_time,source_num] $source_num = $basicRs['source_num'] + 1; models\MvBasic::updateAll(['update_time' => time(), 'source_num' => $source_num], "douban_id={$subject_id}"); echo "vs Percent: {$percent} insert done temp: {$v['id']}-{$v['title']} basic_id: {$basicRs['id']} subject_id: {$subject_id} \n\n"; } else { echo "vs Percent: {$percent} < 100 temp: {$v['id']}-{$v['title']} {$subject_id} \n\n"; } } else { echo "year: {$v['year']} vs {$s['year']} temp: {$v['id']}-{$v['title']} {$subject_id} \n\n"; } } else { echo "basicRs empty {$subject_id} \n\n"; } } } //die('subjects done'); } //die('一组 sujectes done'); sleep(1); } //die('一组 tempSource done'); sleep(1); } }