Example #1
0
function get_single_article($content, $url, $args = array())
{
    global $_G;
    extract($args);
    if (strlen(trim($content)) < 1) {
        return;
    }
    d_s('evo');
    $get_type = $_GET['get_type'] ? intval($_GET['get_type']) : $get_type;
    $get_type = $get_type ? $get_type : 1;
    $milu_set = pick_common_get();
    $rules_info = match_rules($url, $content, $get_type, 0);
    if (is_array($rules_info)) {
        pload('F:fastpick');
        $data = rules_get_article($content, $rules_info);
        write_evo_errlog($data, $url, $rules_info);
    } else {
        $data = (array) cloud_match_rules($get_type, $url, $content);
        //从云端下载规则 这里应该做点优化,暂时没想到方法。
        if (!$data['content'] && $milu_set['fp_open_auto'] == 1) {
            //开启智能获取
            pload('C:HtmlExtractor');
            pload('F:article');
            $he = new HtmlExtractor($content, $url);
            $data = (array) $he->get_text();
            $data['content'] = dz_attach_format($url, $data['content']);
            $arr = format_article_imgurl($url, $data['content']);
            $data['content'] = $arr['message'];
            $del_dom_rules = array('div[id*=share]', 'div[class*=page]');
            foreach ($del_dom_rules as $k => $v) {
                $data['content'] = dom_filter_something($data['content'], $v, 2);
            }
            unset($data['evo_title_info']);
        }
    }
    if ($_GET['type'] == 'bbs') {
        $data['content'] = media_htmlbbcode($data['content'], $url);
        $data['content'] = img_htmlbbcode($data['content'], $url);
    }
    $data['evo_time'] = d_e(0, 'evo');
    return $data;
}
Example #2
0
 function evo_get()
 {
     $milu_set = pick_common_get();
     $get_type = 3;
     $rules_info = match_rules($this->url, $this->str, $get_type, 0);
     //从本地学习到的规则获取
     if (!is_array($rules_info) || !$rules_info) {
         $get_type = 5;
         //只从详细页搜索
         $rules_info = match_rules($this->url, $this->str, $get_type, 0);
         //尝试从本地内置规则取
     }
     $data['evo'] = 2;
     if (!is_array($rules_info) || !$rules_info) {
         $data = cloud_match_rules(3, $this->url, $this->str);
         //从服务器端获取
         if (!$data['content']) {
             return array();
         }
         if (!$data['title']) {
             $re_title = $this->get_title();
             if ($re_title['html']) {
                 $data['title'] = $re_title['html'];
             }
         }
         if ($data['content']) {
             return $data;
         }
     }
     if (!$rules_info) {
         return array();
     }
     $data = evo_rules_get_article($this->str, $rules_info);
     if (!$data['content']) {
         //如果匹配到规则,但是又获取不到内容,证明规则出错了,记录起来
         pload('F:fastpick');
         write_evo_errlog($data, $this->url, $rules_info);
     }
     if (!$data['title']) {
         $re_title = $this->get_title();
         if ($re_title) {
             $data['title'] = $re_title['html'];
         }
     }
     if ($rules_info['detail_ID_test'] != $this->url) {
         DB::update("strayer_evo", array('hit_num' => $rules_info['hit_num'] + 1), array("id" => $rules_info['id']));
     }
     return $data;
 }