function RefurlCookie($gurl){ global $gcookie,$lastRfurl; $gurl = trim($gurl); if(!empty($gcookie) && $lastRfurl==$gurl) return $gcookie; else $lastRfurl=$gurl; if(trim($gurl)=='') return ''; $urlinfos = GetHostInfo($gurl); $ghost = $urlinfos['host']; $gquery = $urlinfos['query']; $sessionQuery = "GET $gquery HTTP/1.1\r\n"; $sessionQuery .= "Host: $ghost\r\n"; $sessionQuery .= "Accept: */*\r\n"; $sessionQuery .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"; $sessionQuery .= "Connection: Close\r\n\r\n"; $errno = ""; $errstr = ""; $m_fp = fsockopen($ghost, 80, $errno, $errstr,10) or die($ghost.'<br />'); fwrite($m_fp,$sessionQuery); $lnum = 0; //获取详细应答头 $gcookie = ""; while(!feof($m_fp)){ $line = trim(fgets($m_fp,256)); if($line == "" || $lnum>100) break; else{ if(eregi("^cookie",$line)){ $gcookie = $line; break; } } } fclose($m_fp); return $gcookie; }
function cloud_match_rules($get_type, $url, $content) { global $_G; pload('F:fastpick'); $setting = get_pick_set(); $pick_config = $_G['cache']['evn_milu_pick']['pick_config']; $server_cache_time = $pick_config['index_server_cache_time']; if ($get_type == '3') { //智能学习规则索引过期时间比较短 $server_cache_time = $pick_config['evo_index_server_cache_time']; } $milu_set = pick_common_get(); if ($setting['open_cloud_pick'] != 1) { return FALSE; } pload('F:copyright'); $host_info = GetHostInfo($url); $domain = $host_info['host']; $domain_hash = md5($domain); $url_temp = preg_replace('/\\d+/', '', $url); $arr_temp = parse_url($url_temp); $path_hash = md5($arr_temp['path']); $over_dateline = $_G['timestamp'] - $server_cache_time; $count = DB::result(DB::query("SELECT COUNT(*) FROM " . DB::table('strayer_searchindex') . " WHERE domain_hash='" . $domain_hash . "' AND path_hash='" . $path_hash . "' AND type='" . $get_type . "3' AND dateline > {$over_dateline}"), 0); //3是服务端 4是本地的缓存 if ($count) { return FALSE; } $args = array('get_type' => $get_type, 'url' => $url); $rpcClient = rpcClient(); $client_info = get_client_info(); $re = $rpcClient->cloud_match_rules($args, $client_info); if (is_object($re) || $re->Number == 0) { if ($re->Message) { return milu_lang('phprpc_error', array('msg' => $re->Message)); } $re = (array) $re; } $data = array(); if ($re['data_type'] == 1) { //返回规则 $rules_info = $re['data']; if ($get_type == 3) { $data = evo_rules_get_article($content, $rules_info); } else { $data = rules_get_article($content, $rules_info); } if ($data || $data['content'] && $get_type == 3) { //规则验证有效,下载到本地 if ($get_type == 3) { $data_id = import_evo_data($rules_info); } else { $data_id = import_fastpick_data($rules_info); } if ($data_id) { //先清除之前的索引 DB::query('DELETE FROM ' . DB::table('strayer_searchindex') . " WHERE domain_hash='" . $domain_hash . "' AND path_hash='" . $path_hash . "'"); add_search_index($domain_hash, $path_hash, $get_type . '4', $data_id); //添加索引 } } } else { if ($re['data_type'] == 2) { //返回内容 $data = $re['data']; } else { //一无所获,那也要告诉客户端,别再骚扰服务端了 add_search_index($domain_hash, $path_hash, $get_type . '3', 0); } } return $data; }
function evo_set($info) { global $_G; if (!$info) { return; } if (strlen($info['text']['html']) < 200 || strlen($info['title']['html']) < 10) { return; } //标题和内容太短都不行 $link_count = own_link_count($info['text']['html'], $this->url); if ($link_count > 10) { return FALSE; } //有10个指向自己的链接,就不行 $milu_set = pick_common_get(); if ($milu_set['fp_open_evo'] != 1) { return FALSE; } $text_info = $this->dom_info_arr[$info['text']['key']]; $title_info = $this->dom_info_arr[$info['title']['key']]; $text_info['html'] = $info['text']['html']; $title_info['html'] = $info['title']['html']; $info['title_split_arr'] = $this->get_split_arr($title_info); $info['text_split_arr'] = $split_arr = $this->get_split_arr($text_info); unset($text_info['outertext'], $text_info['parent']['outertext'], $title_info['outertext'], $title_info['parent']['outertext'], $text_info['html'], $title_info['html']); if (strlen($split_arr[0]) < 14) { return FALSE; } pload('F:copyright'); $host_info = GetHostInfo($this->url); $domain = $host_info['host']; $domain_hash = md5($domain); if (preg_match('/\\d+/', $split_arr[0])) { $s_arr = preg_split("/[\\d]+/", $split_arr[0]); $split_arr[0] = $s_arr[0]; foreach ((array) $s_arr as $k => $v) { if (strlen($v) > strlen($split_arr[0])) { $split_arr[0] = $v; } } } if (!$title_info) { return FALSE; } $result_info['evo_title_info'] = $title_info; $setarr = array('content_get_type' => 0, 'detail_ID' => $split_arr[0], 'detail_ID_hash' => md5($split_arr[0]), 'detail_ID_test' => $this->url, 'content_rules' => '', 'evo_text_info' => serialize($text_info), 'evo_title_info' => serialize($title_info), 'domain_hash' => $domain_hash, 'domain' => $domain, 'status' => 0, 'dateline' => $_G['timestamp']); $setarr = paddslashes($setarr); $base_sql = "SELECT * FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=0"; $data_info = DB::fetch_first($base_sql . " AND detail_ID_test!='{$this->url}'"); $data_info = pstripslashes($data_info); if (!$data_info) { //还没有资料 if (!($check = DB::result(DB::query("SELECT COUNT(*) FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=0 AND detail_ID_test='{$this->url}'"), 0))) { DB::insert('strayer_evo', $setarr, TRUE); } $result_info['status'] = 'no'; return $result_info; } else { //有了资料 $title_rules = $this->get_rules($info, $title_info, $data_info, 'title'); $text_rules = $this->get_rules($info, $text_info, $data_info, 'text'); //删除之前的一些记录,防止没有索引的情况下重复生成规则 $check_info = DB::fetch_first("SELECT * FROM " . DB::table('strayer_evo') . " WHERE domain_hash='{$domain_hash}' AND detail_ID_hash='" . $setarr['detail_ID_hash'] . "' AND status=1"); DB::query('DELETE FROM ' . DB::table('strayer_evo') . " WHERE id='{$check_info['id']}'"); DB::query('DELETE FROM ' . DB::table('strayer_searchindex') . " WHERE id='{$check_info['id']}' AND type='34'"); if ($text_rules) { $setarr = array('content_get_type' => $text_rules['get_type'], 'content_rules' => $text_rules['rules'], 'theme_get_type' => $title_rules['get_type'], 'theme_rules' => $title_rules['rules'], 'status' => 1); DB::update("strayer_evo", $setarr, array("id" => $data_info['id'])); $pash_hash = get_path_hash($this->url); add_search_index($domain_hash, $path_hash, 34, $data_info['id']); //添加索引 4是本地缓存 $pick_set = get_pick_set(); if ($pick_set['open_cloud_pick'] == 1) { //开启云采集,将规则上传到服务端 $rpcClient = rpcClient(); unset($setarr['status']); $data_info['content_get_type'] = $setarr['content_get_type']; $data_info['content_rules'] = $setarr['content_rules']; $data_info['theme_get_type'] = $setarr['theme_get_type']; $data_info['theme_rules'] = $setarr['theme_rules']; $client_info = get_client_info(); $re = $rpcClient->upload_evo_data($data_info, $client_info); } del_search_index(3); $result_info['status'] = 'ok'; return $result_info; } } }