function evo_set($info)
 {
     global $_G;
     if (!$info) {
         return;
     }
     if (strlen($info['text']['html']) < 200 || strlen($info['title']['html']) < 10) {
         return;
     }
     //标题和内容太短都不行
     $link_count = own_link_count($info['text']['html'], $this->url);
     if ($link_count > 10) {
         return FALSE;
     }
     //有10个指向自己的链接,就不行
     $milu_set = pick_common_get();
     if ($milu_set['fp_open_evo'] != 1) {
         return FALSE;
     }
     $text_info = $this->dom_info_arr[$info['text']['key']];
     $title_info = $this->dom_info_arr[$info['title']['key']];
     $text_info['html'] = $info['text']['html'];
     $title_info['html'] = $info['title']['html'];
     $info['title_split_arr'] = $this->get_split_arr($title_info);
     $info['text_split_arr'] = $split_arr = $this->get_split_arr($text_info);
     unset($text_info['outertext'], $text_info['parent']['outertext'], $title_info['outertext'], $title_info['parent']['outertext'], $text_info['html'], $title_info['html']);
     if (strlen($split_arr[0]) < 14) {
         return FALSE;
     }
     pload('F:copyright');
     $host_info = GetHostInfo($this->url);
     $domain = $host_info['host'];
     $domain_hash = md5($domain);
     if (preg_match('/\\d+/', $split_arr[0])) {
         $s_arr = preg_split("/[\\d]+/", $split_arr[0]);
         $split_arr[0] = $s_arr[0];
         foreach ((array) $s_arr as $k => $v) {
             if (strlen($v) > strlen($split_arr[0])) {
                 $split_arr[0] = $v;
             }
         }
     }
     if (!$title_info) {
         return FALSE;
     }
     $result_info['evo_title_info'] = $title_info;
     $setarr = array('content_get_type' => 0, 'detail_ID' => $split_arr[0], 'detail_ID_hash' => md5($split_arr[0]), 'detail_ID_test' => $this->url, 'content_rules' => '', 'evo_text_info' => serialize($text_info), 'evo_title_info' => serialize($title_info), 'domain_hash' => $domain_hash, 'domain' => $domain, 'status' => 0, 'dateline' => $_G['timestamp']);
     $setarr = paddslashes($setarr);
     $base_sql = "SELECT * FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=0";
     $data_info = DB::fetch_first($base_sql . " AND detail_ID_test!='{$this->url}'");
     $data_info = pstripslashes($data_info);
     if (!$data_info) {
         //还没有资料
         if (!($check = DB::result(DB::query("SELECT COUNT(*) FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=0 AND detail_ID_test='{$this->url}'"), 0))) {
             DB::insert('strayer_evo', $setarr, TRUE);
         }
         $result_info['status'] = 'no';
         return $result_info;
     } else {
         //有了资料
         $title_rules = $this->get_rules($info, $title_info, $data_info, 'title');
         $text_rules = $this->get_rules($info, $text_info, $data_info, 'text');
         //删除之前的一些记录,防止没有索引的情况下重复生成规则
         $check_info = DB::fetch_first("SELECT * FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=1");
         DB::query('DELETE FROM ' . DB::table('strayer_evo') . " WHERE id='{$check_info['id']}'");
         DB::query('DELETE FROM ' . DB::table('strayer_searchindex') . " WHERE id='{$check_info['id']}' AND type='34'");
         if ($text_rules) {
             $setarr = array('content_get_type' => $text_rules['get_type'], 'content_rules' => $text_rules['rules'], 'theme_get_type' => $title_rules['get_type'], 'theme_rules' => $title_rules['rules'], 'status' => 1);
             DB::update("strayer_evo", $setarr, array("id" => $data_info['id']));
             $pash_hash = get_path_hash($this->url);
             add_search_index($domain_hash, $path_hash, 34, $data_info['id']);
             //添加索引 4是本地缓存
             $pick_set = get_pick_set();
             if ($pick_set['open_cloud_pick'] == 1) {
                 //开启云采集,将规则上传到服务端
                 $rpcClient = rpcClient();
                 unset($setarr['status']);
                 $data_info['content_get_type'] = $setarr['content_get_type'];
                 $data_info['content_rules'] = $setarr['content_rules'];
                 $data_info['theme_get_type'] = $setarr['theme_get_type'];
                 $data_info['theme_rules'] = $setarr['theme_rules'];
                 $client_info = get_client_info();
                 $re = $rpcClient->upload_evo_data($data_info, $client_info);
             }
             del_search_index(3);
             $result_info['status'] = 'ok';
             return $result_info;
         }
     }
 }
Beispiel #2
0
 function check_article($arr)
 {
     global $_G;
     $evo_rules = $_G['cache']['evn_milu_pick']['evo_rules'];
     if (!$this->temp_arr['have_reply']) {
         $this->temp_arr['have_reply'] = 2;
         $this->get_pick_status(1);
     }
     $this->status_arr['now'] = $this->status_arr['now'] - 1;
     $this->status_arr = array_merge($this->msg_args, $this->status_arr);
     if ($arr['content'] == 'list') {
         show_pick_info(milu_lang('is_page_web'), 'err', $this->status_arr);
         return FALSE;
     }
     $arr['title'] = trim($arr['title']);
     if (!$arr['title']) {
         show_pick_info(milu_lang('no_get_title'), 'err', $this->status_arr);
         return FALSE;
     }
     $title_len = strlen(_striptext(trim($arr['title'])));
     if ($title_len < 1) {
         show_pick_info(milu_lang('title_too_short'), 'err', $this->status_arr);
         return FALSE;
     }
     if (strlen($arr['title']) < $this->min_title_len) {
         show_pick_info(milu_lang('so_short_title'), 'err', $this->status_arr);
         return FALSE;
     }
     if (array_key_exists('evo', $arr)) {
         if ($arr['evo'] != 2) {
             if (!$arr['evo_title_info']) {
                 show_pick_info(milu_lang('no_article_view'), 'err', $this->status_arr);
                 //exit();
                 return FALSE;
             }
             if ($arr['evo'] == 0) {
                 $link_count = own_link_count($arr['content'], $this->now_url);
                 if ($link_count > $this->min_own_link) {
                     show_pick_info(milu_lang('is_list_page'), 'err', $this->status_arr);
                     return FALSE;
                 }
             }
         }
     }
     $arr['content'] = trim($arr['content']);
     if (!$arr['content']) {
         show_pick_info(milu_lang('no_get_content'), 'err', $this->status_arr);
         return FALSE;
     }
     $content_len = strlen($arr['content']);
     if ($content_len < $this->p_arr['article_min_len'] * 2 && $this->p_arr['article_min_len']) {
         show_pick_info(milu_lang('data_too_short'), 'err', $this->status_arr);
         return FALSE;
     }
     if ($content_len > 600000) {
         show_pick_info(milu_lang('data_too_long'), 'err', $this->status_arr);
         return FALSE;
     }
     if ($this->p_arr['keyword_flag'] == 1) {
         //按关键词过滤
         if (filter_something($arr['title'], $this->p_arr['keyword_title'])) {
             //必须包含
             show_pick_info(milu_lang('title_must_keyword'), 'err', $this->status_arr);
             return FALSE;
         }
         if (!filter_something($arr['title'], $this->p_arr['keyword_title_exclude'], TRUE)) {
             //不包含
             show_pick_info(milu_lang('title_no_must_keyword'), 'err', $this->status_arr);
             return FALSE;
         }
         if (filter_something($arr['content'], $this->p_arr['keyword_content'])) {
             //必须包含
             show_pick_info(milu_lang('content_must_keyword'), 'err', $this->status_arr);
             return FALSE;
         }
         if (!filter_something($arr['content'], $this->p_arr['keyword_content_exclude'], TRUE)) {
             //不包含
             show_pick_info(milu_lang('content_no_must_keyword'), 'err', $this->status_arr);
             return FALSE;
         }
     }
     return TRUE;
 }