Exemplo n.º 1
0
 public function col_url_list()
 {
     $nodeid = isset($_GET['nodeid']) ? intval($_GET['nodeid']) : showmessage(L('illegal_parameters'), HTTP_REFERER);
     if ($data = $this->db->get_one(array('nodeid' => $nodeid))) {
         pc_base::load_app_class('collection', '', 0);
         $urls = collection::url_list($data);
         $total_page = count($urls);
         if ($total_page > 0) {
             $page = isset($_GET['page']) ? intval($_GET['page']) : 0;
             $url_list = $urls[$page];
             $url = collection::get_url_lists($url_list, $data);
             $history_db = pc_base::load_model('collection_history_model');
             $content_db = pc_base::load_model('collection_content_model');
             $total = count($url);
             $re = 0;
             if (is_array($url) && !empty($url)) {
                 foreach ($url as $v) {
                     if (empty($v['url']) || empty($v['title'])) {
                         continue;
                     }
                     $v = new_addslashes($v);
                     $v['title'] = strip_tags($v['title']);
                     $md5 = md5($v['url']);
                     if (!$history_db->get_one(array('md5' => $md5, 'siteid' => $this->get_siteid()))) {
                         $history_db->insert(array('md5' => $md5, 'siteid' => $this->get_siteid()));
                         $content_db->insert(array('nodeid' => $nodeid, 'status' => 0, 'url' => $v['url'], 'title' => $v['title'], 'siteid' => $this->get_siteid()));
                     } else {
                         $re++;
                     }
                 }
             }
             $show_header = $show_dialog = true;
             if ($total_page <= $page) {
                 $this->db->update(array('lastdate' => SYS_TIME), array('nodeid' => $nodeid));
             }
             include $this->admin_tpl('col_url_list');
         } else {
             showmessage(L('not_to_collect'));
         }
     } else {
         showmessage(L('notfound'));
     }
 }
Exemplo n.º 2
0
 /**
  * 测试文章URL采集
  */
 public function public_test()
 {
     $nodeid = isset($_GET['nodeid']) ? intval($_GET['nodeid']) : showmessage(L('illegal_parameters'), HTTP_REFERER);
     if ($data = $this->db->getby_nodeid($nodeid)) {
         Loader::lib('collection:collection', false);
         $urls = collection::url_list($data, 1);
         if (!empty($urls)) {
             foreach ($urls as $v) {
                 $url = collection::get_url_lists($v, $data);
             }
         }
         $show_header = $show_dialog = true;
         include $this->view('public_test');
     } else {
         showmessage(L('notfound'));
     }
 }
Exemplo n.º 3
0
    //print_r($html[1]);exit;
    if (is_array($html)) {
        $html = explode('</div>', $html[1]);
    }
    $data['content'] = str_replace('src="/uploadfile/', 'src="http://www.chinacatholic.org/uploadfile/', $html[0]);
    //移出margin-left属性
    //	$data['content']=str_replace('margin-left: 240px;', '', $data['content']);
    //	$data['content'] = preg_replace('/(<p.+?)style=".+?"(>.+?)/i', "$1$2", $data['content']);
    $data['content'] = preg_replace('/margin-left.*[1,10]px;/', '', $data['content']);
    //print_r($html[0]);exit;
    //	echo($data['content']);
    return $data;
}
@set_time_limit(600);
foreach ($urls as $k => $url_list) {
    $url = collection::get_url_lists($url_list, $cjconfig);
    //var_dump($url );exit;
    if (is_array($url) && !empty($url)) {
        foreach ($url as $v) {
            //if (empty($v['url']) || empty($v['title']) || (strpos($v['url'],'www.chinacatholic.org')<1)) continue;
            if (empty($v['url']) || empty($v['title']) || strpos($v['url'], 'www.chinacatholic.org') < 1) {
                echo '<b>invalid url:' . $v['url'] . '</b><br/>';
                continue;
            }
            //$v = new_addslashes($v);
            $v['url'] = str_replace('/index/id', '', $v['url']);
            $v['title'] = strip_tags($v['title']);
            $md5 = md5($v['url']);
            if (!$db->get_one('id', 'faithlife', " md5url='{$md5}' ")) {
                $cinfo = get_content($v['url']);
                //获取发布时间、作者、来源、内容