/** * 生成拼音 */ public function actionFixpinyin() { //error_reporting(E_ERROR | E_WARNING | E_PARSE | E_NOTICE); $mvBasic = MvBasic::find()->where('id=40')->orderBy('id asc'); $total = $mvBasic->count(); $page = 1; $pageSize = 40; $totalPage = ceil($total / $pageSize); $mvBasic->limit($pageSize)->asArray(); for ($page = 1; $page <= $totalPage; $page++) { $offset = ($page - 1) * $pageSize; $data = $mvBasic->offset($offset)->all(); foreach ($data as $v) { $basic_id = $v['id']; $py_title = PinyinUtil::getFullPy($v['title']); $py_title = PinyinUtil::subPinyin($py_title); $py_original_title = PinyinUtil::getFullPy($v['original_title']); $py_original_title = PinyinUtil::subPinyin($py_original_title); try { // 更新 $py = MvBasic::findOne($basic_id); $py->py_title = $py_title; $py->py_original_title = $py_original_title; $rs = $py->save(); } catch (ErrorException $e) { //Yii::warning("Division by zero."); } echo $rs, ' ', $basic_id, "\n"; } sleep(1); } }
public function actionSphinxdata() { ini_set('memory_limit', '512M'); $dataFile = '/usr/local/sphinx/data/mv_main.tsv'; // mv_basic_condition $movieCondData = models\MvBasicCondition::find()->select('*')->asArray()->all(); $movieCond = []; foreach ($movieCondData as $v) { $movieCond[$v['cond_type']][$v['id']] = $v['name']; } // mv_basic_type $movieTypeData = models\MvBasicType::find()->select('*')->asArray()->all(); $movieType = []; foreach ($movieTypeData as $v) { $movieType[$v['basic_id']][] = $movieCond['1'][$v['cate_id']]; } // mv_basic_country $movieCountryData = models\MvBasicCountry::find()->select('*')->asArray()->all(); $movieCountry = []; foreach ($movieCountryData as $v) { $movieCountry[$v['basic_id']][] = $movieCond['2'][$v['country_id']]; } // mv_basic $movieData = models\MvBasic::find()->select('id,title,py_title,original_title,py_original_title,aka,directors,casts,year,rating,update_time,status')->where('status=1 and rating>"0.0"')->asArray()->all(); if ($h = fopen($dataFile, "wb")) { foreach ($movieData as $k => &$v) { $id = $v['id']; var_dump($id); $v['directors'] = str_replace("\t", "", $v['directors']); $v['casts'] = str_replace("\t", "", $v['casts']); //$v['aka'] = isset($v['aka']) ? implode(' ', explode(',',$v['aka'])) : ''; $v['type'] = isset($movieType[$id]) ? implode(' ', $movieType[$id]) : ''; $v['country'] = isset($movieCountry[$id]) ? implode(' ', $movieCountry[$id]) : ''; //var_dump( implode("\t",$v) );die; $line = implode("\t", $v) . "\n"; fwrite($h, $line); } fclose($h); } }
/** * 更新电影基础数据 */ public function actionUpdatemovie() { $mvBasic = MvBasic::find()->where('id>=6214')->orderBy('id asc'); $total = $mvBasic->count(); $page = 1; $pageSize = 40; $totalPage = ceil($total / $pageSize); $mvBasic->limit($pageSize)->asArray(); for ($page = 1; $page <= $totalPage; $page++) { $offset = ($page - 1) * $pageSize; $data = $mvBasic->offset($offset)->all(); foreach ($data as $v) { $basic_id = $v['id']; var_dump('doubanId: ' . $v['douban_id'] . ' basicId: ' . $basic_id . "\n"); if (empty($v['douban_id'])) { continue; } $douban_url = 'http://movie.douban.com/subject/' . $v['douban_id'] . '/'; //$douban_url = 'http://movie.douban.com/subject/10727641/'; //$basic_id = 1793; $html = HttpClient::curlRequset($douban_url); if (empty($html)) { continue; } echo "curl done \n"; $reg1 = "/<div id=\"info\">[\\s\\S]+?<\\/div>/"; preg_match($reg1, $html, $info_match); if (!isset($info_match['0']) && empty($info_match['0'])) { continue; } $html_info = $info_match['0']; $movie = $this->pregDoubanData($html, $html_info, $v); if ($movie) { // 更新 $py_title = PinyinUtil::getFullPy($movie['title']); $py_title = PinyinUtil::subPinyin($py_title); $mvBasic1 = MvBasic::findOne(['douban_id' => $v['douban_id']]); //$mvBasic1 = MvBasic::findOne(['douban_id'=>'10727641']); // sub casts $casts = implode(',', $movie['casts']); if (mb_strlen($casts, 'UTF-8') > 255) { $new_casts = mb_substr($casts, 0, 255, 'UTF-8'); $new_casts_arr = array_filter(explode(',', $new_casts)); $last_cast = end($new_casts_arr); if (!in_array($last_cast, $new_casts_arr)) { array_pop($new_casts_arr); } $casts = implode(',', $new_casts_arr); } $mvBasic1->title = $movie['title']; $mvBasic1->py_title = $py_title; $mvBasic1->rating = $movie['rating']; $mvBasic1->directors = implode(',', $movie['directors']); $mvBasic1->writers = implode(',', $movie['writers']); $mvBasic1->casts = $casts; //$mvBas1ic1->cate = $movie['cate']; $mvBasic1->countries = implode(',', $movie['countries']); $mvBasic1->languages = implode(',', $movie['languages']); $mvBasic1->pubdates = implode(',', $movie['pubdates']); $mvBasic1->durations = implode(',', $movie['durations']); $mvBasic1->aka = implode(',', $movie['aka']); $rs = $mvBasic1->save(); if ($rs && $basic_id) { // movie type $movieType = $movie['cate']; $this->updateMovieType($basic_id, $movieType); // country $movieCountry = $movie['countries']; $this->updateMovieCountry($basic_id, $movieCountry); } var_dump($rs); echo "{$v['title']} \n\n"; } //die; file_put_contents('./last_id.txt', $v['douban_id'] . '=' . $v['id']); sleep(1); } } echo "\n all done \n"; }
public function actionMarksource() { $db_api = new ApiDouban(); // mv_source_temp $tempSource = models\MvSourceTemp::find()->where('basic_id=0 and type!=0')->orderBy('id asc'); $total = $tempSource->count(); $page = 1; $pageSize = 40; $totalPage = ceil($total / $pageSize); $tempSource->limit($pageSize)->asArray(); for ($page = 1; $page <= $totalPage; $page++) { // 分页 $offset = ($page - 1) * $pageSize; $tempSourceData = $tempSource->offset($offset)->all(); foreach ($tempSourceData as $v) { // 遍历temp数据 echo "tempSource: {$v['title']} \n"; $searchRs = $db_api->searchpj($v['title']); // douban search //echo "douban search Rs: {$searchRs} \n"; //if(isset($searchRs['subjects']) && !empty($searchRs['subjects'])) { //foreach($searchRs['subjects'] as $s) { if (isset($searchRs) && !empty($searchRs)) { foreach ($searchRs as $s) { //$subject_id = $s['id']; // 匹配subject_id $reg = '/subject\\/(\\d+?)\\//'; preg_match($reg, $s['url'], $match); $subject_id = isset($match['1']) ? $match['1'] : 0; //var_dump($match);die; echo "douban Search: {$subject_id} {$s['title']}\n"; if (!$subject_id) { echo "douban Subject_id empty: {$subject_id} {$s['title']}\n"; continue; } // spider是否存在subject_id $spiderRs = models\MvSpider::find()->where(['subject_id' => $subject_id])->asArray()->all(); if (!$spiderRs) { $MvSpider = new models\MvSpider(); $MvSpider->subject_id = $subject_id; $MvSpider->title = $s['title']; $MvSpider->from_tag = 'search'; $MvSpider->status = 20; $MvSpider->create_time = time(); $MvSpider->save(); echo "spider save {$subject_id} \n\n"; } else { // basic是否存在subject_id $basicRs = models\MvBasic::find()->where(['douban_id' => $subject_id])->asArray()->one(); if ($basicRs && isset($basicRs['id']) && !empty($basicRs['id'])) { echo "basicRs found subject_id: {$subject_id} temp: {$v['id']}-{$v['title']} basic_id: {$basicRs['id']} basic_title: {$basicRs['title']} \n"; // 对比 year,title if ($v['year'] == $s['year']) { // 同一年 similar_text($v['title'], $s['title'], $percent); echo "vs Percent: {$percent} \n"; if ($percent >= 100) { // insert mv_source $MvSpider = new models\MvSource(); $MvSpider->site_id = 10; $MvSpider->basic_id = $basicRs['id']; $MvSpider->type = $v['type']; $MvSpider->name = $v['name']; $MvSpider->download_url = $v['download_url']; $MvSpider->play_url = $v['play_url']; $MvSpider->ext = $v['ext']; $MvSpider->update_time = time(); $MvSpider->create_time = time(); $MvSpider->save(); // update mv_source_temp [basic_id] models\MvSourceTemp::updateAll(['basic_id' => $basicRs['id']], "id={$v['id']}"); // update mv_basic [update_time,source_num] $source_num = $basicRs['source_num'] + 1; models\MvBasic::updateAll(['update_time' => time(), 'source_num' => $source_num], "douban_id={$subject_id}"); echo "vs Percent: {$percent} insert done temp: {$v['id']}-{$v['title']} basic_id: {$basicRs['id']} subject_id: {$subject_id} \n\n"; } else { echo "vs Percent: {$percent} < 100 temp: {$v['id']}-{$v['title']} {$subject_id} \n\n"; } } else { echo "year: {$v['year']} vs {$s['year']} temp: {$v['id']}-{$v['title']} {$subject_id} \n\n"; } } else { echo "basicRs empty {$subject_id} \n\n"; } } } //die('subjects done'); } //die('一组 sujectes done'); sleep(1); } //die('一组 tempSource done'); sleep(1); } }