Esempio n. 1
0
function get_single_article($content, $url, $args = array())
{
    global $_G;
    extract($args);
    if (strlen(trim($content)) < 1) {
        return;
    }
    d_s('evo');
    $get_type = $_GET['get_type'] ? intval($_GET['get_type']) : $get_type;
    $get_type = $get_type ? $get_type : 1;
    $milu_set = pick_common_get();
    $rules_info = match_rules($url, $content, $get_type, 0);
    if (is_array($rules_info)) {
        pload('F:fastpick');
        $data = rules_get_article($content, $rules_info);
        write_evo_errlog($data, $url, $rules_info);
    } else {
        $data = (array) cloud_match_rules($get_type, $url, $content);
        //从云端下载规则 这里应该做点优化,暂时没想到方法。
        if (!$data['content'] && $milu_set['fp_open_auto'] == 1) {
            //开启智能获取
            pload('C:HtmlExtractor');
            pload('F:article');
            $he = new HtmlExtractor($content, $url);
            $data = (array) $he->get_text();
            $data['content'] = dz_attach_format($url, $data['content']);
            $arr = format_article_imgurl($url, $data['content']);
            $data['content'] = $arr['message'];
            $del_dom_rules = array('div[id*=share]', 'div[class*=page]');
            foreach ($del_dom_rules as $k => $v) {
                $data['content'] = dom_filter_something($data['content'], $v, 2);
            }
            unset($data['evo_title_info']);
        }
    }
    if ($_GET['type'] == 'bbs') {
        $data['content'] = media_htmlbbcode($data['content'], $url);
        $data['content'] = img_htmlbbcode($data['content'], $url);
    }
    $data['evo_time'] = d_e(0, 'evo');
    return $data;
}
Esempio n. 2
0
 public function testExtract()
 {
     $extractor = new HtmlExtractor();
     static::assertEquals(['mary', 'is', 'very', 'tall', 'she', 'was', 'in', 'the', '9th', 'grade'], $extractor->extract('<html><head><title>Mary is very tall.</title></head><body>She was in the 9th grade.</body></html>'));
 }