public function crawlcontent()
 {
     $sort_id = I('post.category', '', 'intval');
     $url = I('post.url');
     if (!$sort_id) {
         $json['alertinfo'] = "请选择栏目";
         $this->ajaxReturn($json);
     }
     if (!$url) {
         exit;
     }
     if (IS_AJAX) {
         $sf = array('ifeng' => array('content' => '#main_content p'), 'qq' => array('content' => '#Cnt-Main-Article-QQ p'), 'weixin' => array('title' => '#activity-name', 'content' => '#js_content p'), 'sina' => array('content' => '#artibody p', 'title' => '#artibodyTitle'), '163' => array('content' => '#endText p'), 'toutiao' => array('content' => '.article-content p,.article-content div p'));
         $check['link'] = md5($url);
         if (M('history_list')->where($check)->find()) {
             $json['url'] = htmlspecialchars_decode($url);
             $json['info'] = "<span class='pink'>已存在,跳过</span>";
             $this->ajaxReturn($json);
         }
         $current_sf = I('post.sf');
         $dom = $this->_curl($url);
         $data = mb_convert_encoding($dom, 'utf-8', 'GBK,UTF-8,ASCII');
         $data = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' . $data;
         vendor("HtmlParser/ParserDom");
         $obj = new \ParserDom($data);
         $title_selector = $sf[$current_sf]['title'] ? $sf[$current_sf]['title'] : "h1";
         try {
             $title = $obj->find($title_selector);
             $keywords = $obj->find('meta[name=keywords]');
             $description = $obj->find('meta[name=description]');
             $content = $obj->find($sf[$current_sf]['content']);
             if ($title) {
                 $detail['title'] = $title[0]->getPlainText();
             }
             if ($keywords) {
                 $detail['seo_keywords'] = $description[0]->getAttr('content');
             }
             if ($description) {
                 $detail['description'] = $description[0]->getAttr('content');
             }
             if ($content) {
                 $detail['content'] = "";
                 foreach ($content as $k => $v) {
                     $detail['content'] .= preg_replace('/href=[\'\\"]?[:\\/\\w#\\.]*[\'\\"]?/i', '', $v->outerHtml());
                 }
             }
         } catch (Exception $e) {
         }
         //$detail['source'] = $current_sf;
         if ($detail['content']) {
             //此处根据前台提交的category(栏目ID),将内容发布到指定的栏目
             if (M('crawl_content')->add($detail)) {
                 $history['link'] = md5($url);
                 $history['scheme'] = I('post.scheme');
                 M('history_list')->add($history);
                 $json['info'] = "<span class='green'>已入库</span>";
             } else {
                 $json['info'] = "<span class='blue'>系统错误</span>";
             }
         } else {
             $json['info'] = "<span class='red'>无内容,跳过</span>";
         }
         $json['url'] = htmlspecialchars_decode($url);
         $this->ajaxReturn($json);
     }
 }
function dom($html_dom, $node = array())
{
    if (!class_exists('ParserDom')) {
        include_once ROOT_PATH . 'inc/class/dom/ParserInterface.php';
        include_once ROOT_PATH . 'inc/class/dom/ParserAbstract.php';
        include_once ROOT_PATH . 'inc/class/dom/ParserDom.php';
    }
    $html_dom = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>' . $html_dom;
    $dom = new ParserDom($html_dom);
    $arr = array();
    foreach ($node as $k => $v) {
        $find = $dom->find($v['el']);
        foreach ($find as $k1 => $v1) {
            if ($v['attr'] == '' || $v['attr'] == 'text') {
                $value = $v1->getPlainText();
            } else {
                $value = $v1->getAttr($v['attr']);
            }
            if ($v['replace']) {
                $con = $v['content'] ? $v['content'] : '';
                $value = str_replace($v['replace'], $con, $value);
            }
            $name = $v['name'];
            $arr[$k1][$name] = $value;
        }
    }
    unset($dom);
    return $arr;
}