function get_single_article($content, $url, $args = array()) { global $_G; extract($args); if (strlen(trim($content)) < 1) { return; } d_s('evo'); $get_type = $_GET['get_type'] ? intval($_GET['get_type']) : $get_type; $get_type = $get_type ? $get_type : 1; $milu_set = pick_common_get(); $rules_info = match_rules($url, $content, $get_type, 0); if (is_array($rules_info)) { pload('F:fastpick'); $data = rules_get_article($content, $rules_info); write_evo_errlog($data, $url, $rules_info); } else { $data = (array) cloud_match_rules($get_type, $url, $content); //从云端下载规则 这里应该做点优化,暂时没想到方法。 if (!$data['content'] && $milu_set['fp_open_auto'] == 1) { //开启智能获取 pload('C:HtmlExtractor'); pload('F:article'); $he = new HtmlExtractor($content, $url); $data = (array) $he->get_text(); $data['content'] = dz_attach_format($url, $data['content']); $arr = format_article_imgurl($url, $data['content']); $data['content'] = $arr['message']; $del_dom_rules = array('div[id*=share]', 'div[class*=page]'); foreach ($del_dom_rules as $k => $v) { $data['content'] = dom_filter_something($data['content'], $v, 2); } unset($data['evo_title_info']); } } if ($_GET['type'] == 'bbs') { $data['content'] = media_htmlbbcode($data['content'], $url); $data['content'] = img_htmlbbcode($data['content'], $url); } $data['evo_time'] = d_e(0, 'evo'); return $data; }
function evo_get() { $milu_set = pick_common_get(); $get_type = 3; $rules_info = match_rules($this->url, $this->str, $get_type, 0); //从本地学习到的规则获取 if (!is_array($rules_info) || !$rules_info) { $get_type = 5; //只从详细页搜索 $rules_info = match_rules($this->url, $this->str, $get_type, 0); //尝试从本地内置规则取 } $data['evo'] = 2; if (!is_array($rules_info) || !$rules_info) { $data = cloud_match_rules(3, $this->url, $this->str); //从服务器端获取 if (!$data['content']) { return array(); } if (!$data['title']) { $re_title = $this->get_title(); if ($re_title['html']) { $data['title'] = $re_title['html']; } } if ($data['content']) { return $data; } } if (!$rules_info) { return array(); } $data = evo_rules_get_article($this->str, $rules_info); if (!$data['content']) { //如果匹配到规则,但是又获取不到内容,证明规则出错了,记录起来 pload('F:fastpick'); write_evo_errlog($data, $this->url, $rules_info); } if (!$data['title']) { $re_title = $this->get_title(); if ($re_title) { $data['title'] = $re_title['html']; } } if ($rules_info['detail_ID_test'] != $this->url) { DB::update("strayer_evo", array('hit_num' => $rules_info['hit_num'] + 1), array("id" => $rules_info['id'])); } return $data; }