示例#1
0
 /** 
  * 生成拼音
  */
 public function actionFixpinyin()
 {
     //error_reporting(E_ERROR | E_WARNING | E_PARSE | E_NOTICE);
     $mvBasic = MvBasic::find()->where('id=40')->orderBy('id asc');
     $total = $mvBasic->count();
     $page = 1;
     $pageSize = 40;
     $totalPage = ceil($total / $pageSize);
     $mvBasic->limit($pageSize)->asArray();
     for ($page = 1; $page <= $totalPage; $page++) {
         $offset = ($page - 1) * $pageSize;
         $data = $mvBasic->offset($offset)->all();
         foreach ($data as $v) {
             $basic_id = $v['id'];
             $py_title = PinyinUtil::getFullPy($v['title']);
             $py_title = PinyinUtil::subPinyin($py_title);
             $py_original_title = PinyinUtil::getFullPy($v['original_title']);
             $py_original_title = PinyinUtil::subPinyin($py_original_title);
             try {
                 // 更新
                 $py = MvBasic::findOne($basic_id);
                 $py->py_title = $py_title;
                 $py->py_original_title = $py_original_title;
                 $rs = $py->save();
             } catch (ErrorException $e) {
                 //Yii::warning("Division by zero.");
             }
             echo $rs, ' ', $basic_id, "\n";
         }
         sleep(1);
     }
 }
示例#2
0
 public function actionSphinxdata()
 {
     ini_set('memory_limit', '512M');
     $dataFile = '/usr/local/sphinx/data/mv_main.tsv';
     // mv_basic_condition
     $movieCondData = models\MvBasicCondition::find()->select('*')->asArray()->all();
     $movieCond = [];
     foreach ($movieCondData as $v) {
         $movieCond[$v['cond_type']][$v['id']] = $v['name'];
     }
     // mv_basic_type
     $movieTypeData = models\MvBasicType::find()->select('*')->asArray()->all();
     $movieType = [];
     foreach ($movieTypeData as $v) {
         $movieType[$v['basic_id']][] = $movieCond['1'][$v['cate_id']];
     }
     // mv_basic_country
     $movieCountryData = models\MvBasicCountry::find()->select('*')->asArray()->all();
     $movieCountry = [];
     foreach ($movieCountryData as $v) {
         $movieCountry[$v['basic_id']][] = $movieCond['2'][$v['country_id']];
     }
     // mv_basic
     $movieData = models\MvBasic::find()->select('id,title,py_title,original_title,py_original_title,aka,directors,casts,year,rating,update_time,status')->where('status=1 and rating>"0.0"')->asArray()->all();
     if ($h = fopen($dataFile, "wb")) {
         foreach ($movieData as $k => &$v) {
             $id = $v['id'];
             var_dump($id);
             $v['directors'] = str_replace("\t", "", $v['directors']);
             $v['casts'] = str_replace("\t", "", $v['casts']);
             //$v['aka'] = isset($v['aka']) ? implode(' ', explode(',',$v['aka'])) : '';
             $v['type'] = isset($movieType[$id]) ? implode(' ', $movieType[$id]) : '';
             $v['country'] = isset($movieCountry[$id]) ? implode(' ', $movieCountry[$id]) : '';
             //var_dump( implode("\t",$v)  );die;
             $line = implode("\t", $v) . "\n";
             fwrite($h, $line);
         }
         fclose($h);
     }
 }
示例#3
0
 /**
  * 更新电影基础数据
  */
 public function actionUpdatemovie()
 {
     $mvBasic = MvBasic::find()->where('id>=6214')->orderBy('id asc');
     $total = $mvBasic->count();
     $page = 1;
     $pageSize = 40;
     $totalPage = ceil($total / $pageSize);
     $mvBasic->limit($pageSize)->asArray();
     for ($page = 1; $page <= $totalPage; $page++) {
         $offset = ($page - 1) * $pageSize;
         $data = $mvBasic->offset($offset)->all();
         foreach ($data as $v) {
             $basic_id = $v['id'];
             var_dump('doubanId: ' . $v['douban_id'] . '  basicId: ' . $basic_id . "\n");
             if (empty($v['douban_id'])) {
                 continue;
             }
             $douban_url = 'http://movie.douban.com/subject/' . $v['douban_id'] . '/';
             //$douban_url = 'http://movie.douban.com/subject/10727641/';
             //$basic_id = 1793;
             $html = HttpClient::curlRequset($douban_url);
             if (empty($html)) {
                 continue;
             }
             echo "curl done \n";
             $reg1 = "/<div id=\"info\">[\\s\\S]+?<\\/div>/";
             preg_match($reg1, $html, $info_match);
             if (!isset($info_match['0']) && empty($info_match['0'])) {
                 continue;
             }
             $html_info = $info_match['0'];
             $movie = $this->pregDoubanData($html, $html_info, $v);
             if ($movie) {
                 // 更新
                 $py_title = PinyinUtil::getFullPy($movie['title']);
                 $py_title = PinyinUtil::subPinyin($py_title);
                 $mvBasic1 = MvBasic::findOne(['douban_id' => $v['douban_id']]);
                 //$mvBasic1 = MvBasic::findOne(['douban_id'=>'10727641']);
                 // sub casts
                 $casts = implode(',', $movie['casts']);
                 if (mb_strlen($casts, 'UTF-8') > 255) {
                     $new_casts = mb_substr($casts, 0, 255, 'UTF-8');
                     $new_casts_arr = array_filter(explode(',', $new_casts));
                     $last_cast = end($new_casts_arr);
                     if (!in_array($last_cast, $new_casts_arr)) {
                         array_pop($new_casts_arr);
                     }
                     $casts = implode(',', $new_casts_arr);
                 }
                 $mvBasic1->title = $movie['title'];
                 $mvBasic1->py_title = $py_title;
                 $mvBasic1->rating = $movie['rating'];
                 $mvBasic1->directors = implode(',', $movie['directors']);
                 $mvBasic1->writers = implode(',', $movie['writers']);
                 $mvBasic1->casts = $casts;
                 //$mvBas1ic1->cate = $movie['cate'];
                 $mvBasic1->countries = implode(',', $movie['countries']);
                 $mvBasic1->languages = implode(',', $movie['languages']);
                 $mvBasic1->pubdates = implode(',', $movie['pubdates']);
                 $mvBasic1->durations = implode(',', $movie['durations']);
                 $mvBasic1->aka = implode(',', $movie['aka']);
                 $rs = $mvBasic1->save();
                 if ($rs && $basic_id) {
                     // movie type
                     $movieType = $movie['cate'];
                     $this->updateMovieType($basic_id, $movieType);
                     // country
                     $movieCountry = $movie['countries'];
                     $this->updateMovieCountry($basic_id, $movieCountry);
                 }
                 var_dump($rs);
                 echo "{$v['title']} \n\n";
             }
             //die;
             file_put_contents('./last_id.txt', $v['douban_id'] . '=' . $v['id']);
             sleep(1);
         }
     }
     echo "\n all done \n";
 }
示例#4
0
 public function actionMarksource()
 {
     $db_api = new ApiDouban();
     // mv_source_temp
     $tempSource = models\MvSourceTemp::find()->where('basic_id=0 and type!=0')->orderBy('id asc');
     $total = $tempSource->count();
     $page = 1;
     $pageSize = 40;
     $totalPage = ceil($total / $pageSize);
     $tempSource->limit($pageSize)->asArray();
     for ($page = 1; $page <= $totalPage; $page++) {
         // 分页
         $offset = ($page - 1) * $pageSize;
         $tempSourceData = $tempSource->offset($offset)->all();
         foreach ($tempSourceData as $v) {
             // 遍历temp数据
             echo "tempSource: {$v['title']} \n";
             $searchRs = $db_api->searchpj($v['title']);
             // douban search
             //echo "douban search Rs: {$searchRs} \n";
             //if(isset($searchRs['subjects']) && !empty($searchRs['subjects'])) {
             //foreach($searchRs['subjects'] as $s) {
             if (isset($searchRs) && !empty($searchRs)) {
                 foreach ($searchRs as $s) {
                     //$subject_id = $s['id'];
                     // 匹配subject_id
                     $reg = '/subject\\/(\\d+?)\\//';
                     preg_match($reg, $s['url'], $match);
                     $subject_id = isset($match['1']) ? $match['1'] : 0;
                     //var_dump($match);die;
                     echo "douban Search: {$subject_id} {$s['title']}\n";
                     if (!$subject_id) {
                         echo "douban Subject_id empty: {$subject_id} {$s['title']}\n";
                         continue;
                     }
                     // spider是否存在subject_id
                     $spiderRs = models\MvSpider::find()->where(['subject_id' => $subject_id])->asArray()->all();
                     if (!$spiderRs) {
                         $MvSpider = new models\MvSpider();
                         $MvSpider->subject_id = $subject_id;
                         $MvSpider->title = $s['title'];
                         $MvSpider->from_tag = 'search';
                         $MvSpider->status = 20;
                         $MvSpider->create_time = time();
                         $MvSpider->save();
                         echo "spider save {$subject_id} \n\n";
                     } else {
                         // basic是否存在subject_id
                         $basicRs = models\MvBasic::find()->where(['douban_id' => $subject_id])->asArray()->one();
                         if ($basicRs && isset($basicRs['id']) && !empty($basicRs['id'])) {
                             echo "basicRs found subject_id: {$subject_id} temp: {$v['id']}-{$v['title']}  basic_id: {$basicRs['id']} basic_title: {$basicRs['title']} \n";
                             // 对比 year,title
                             if ($v['year'] == $s['year']) {
                                 // 同一年
                                 similar_text($v['title'], $s['title'], $percent);
                                 echo "vs Percent: {$percent} \n";
                                 if ($percent >= 100) {
                                     // insert mv_source
                                     $MvSpider = new models\MvSource();
                                     $MvSpider->site_id = 10;
                                     $MvSpider->basic_id = $basicRs['id'];
                                     $MvSpider->type = $v['type'];
                                     $MvSpider->name = $v['name'];
                                     $MvSpider->download_url = $v['download_url'];
                                     $MvSpider->play_url = $v['play_url'];
                                     $MvSpider->ext = $v['ext'];
                                     $MvSpider->update_time = time();
                                     $MvSpider->create_time = time();
                                     $MvSpider->save();
                                     // update mv_source_temp [basic_id]
                                     models\MvSourceTemp::updateAll(['basic_id' => $basicRs['id']], "id={$v['id']}");
                                     // update mv_basic [update_time,source_num]
                                     $source_num = $basicRs['source_num'] + 1;
                                     models\MvBasic::updateAll(['update_time' => time(), 'source_num' => $source_num], "douban_id={$subject_id}");
                                     echo "vs Percent: {$percent} insert done  temp: {$v['id']}-{$v['title']}  basic_id: {$basicRs['id']}  subject_id: {$subject_id} \n\n";
                                 } else {
                                     echo "vs Percent: {$percent} < 100  temp: {$v['id']}-{$v['title']}   {$subject_id}  \n\n";
                                 }
                             } else {
                                 echo "year:  {$v['year']} vs {$s['year']}  temp: {$v['id']}-{$v['title']}  {$subject_id}  \n\n";
                             }
                         } else {
                             echo "basicRs empty {$subject_id} \n\n";
                         }
                     }
                 }
                 //die('subjects done');
             }
             //die('一组 sujectes done');
             sleep(1);
         }
         //die('一组 tempSource done');
         sleep(1);
     }
 }