public function col_url_list() { $nodeid = isset($_GET['nodeid']) ? intval($_GET['nodeid']) : showmessage(L('illegal_parameters'), HTTP_REFERER); if ($data = $this->db->get_one(array('nodeid' => $nodeid))) { pc_base::load_app_class('collection', '', 0); $urls = collection::url_list($data); $total_page = count($urls); if ($total_page > 0) { $page = isset($_GET['page']) ? intval($_GET['page']) : 0; $url_list = $urls[$page]; $url = collection::get_url_lists($url_list, $data); $history_db = pc_base::load_model('collection_history_model'); $content_db = pc_base::load_model('collection_content_model'); $total = count($url); $re = 0; if (is_array($url) && !empty($url)) { foreach ($url as $v) { if (empty($v['url']) || empty($v['title'])) { continue; } $v = new_addslashes($v); $v['title'] = strip_tags($v['title']); $md5 = md5($v['url']); if (!$history_db->get_one(array('md5' => $md5, 'siteid' => $this->get_siteid()))) { $history_db->insert(array('md5' => $md5, 'siteid' => $this->get_siteid())); $content_db->insert(array('nodeid' => $nodeid, 'status' => 0, 'url' => $v['url'], 'title' => $v['title'], 'siteid' => $this->get_siteid())); } else { $re++; } } } $show_header = $show_dialog = true; if ($total_page <= $page) { $this->db->update(array('lastdate' => SYS_TIME), array('nodeid' => $nodeid)); } include $this->admin_tpl('col_url_list'); } else { showmessage(L('not_to_collect')); } } else { showmessage(L('notfound')); } }
/** * 测试文章URL采集 */ public function public_test() { $nodeid = isset($_GET['nodeid']) ? intval($_GET['nodeid']) : showmessage(L('illegal_parameters'), HTTP_REFERER); if ($data = $this->db->getby_nodeid($nodeid)) { Loader::lib('collection:collection', false); $urls = collection::url_list($data, 1); if (!empty($urls)) { foreach ($urls as $v) { $url = collection::get_url_lists($v, $data); } } $show_header = $show_dialog = true; include $this->view('public_test'); } else { showmessage(L('notfound')); } }
//print_r($html[1]);exit; if (is_array($html)) { $html = explode('</div>', $html[1]); } $data['content'] = str_replace('src="/uploadfile/', 'src="http://www.chinacatholic.org/uploadfile/', $html[0]); //移出margin-left属性 // $data['content']=str_replace('margin-left: 240px;', '', $data['content']); // $data['content'] = preg_replace('/(<p.+?)style=".+?"(>.+?)/i', "$1$2", $data['content']); $data['content'] = preg_replace('/margin-left.*[1,10]px;/', '', $data['content']); //print_r($html[0]);exit; // echo($data['content']); return $data; } @set_time_limit(600); foreach ($urls as $k => $url_list) { $url = collection::get_url_lists($url_list, $cjconfig); //var_dump($url );exit; if (is_array($url) && !empty($url)) { foreach ($url as $v) { //if (empty($v['url']) || empty($v['title']) || (strpos($v['url'],'www.chinacatholic.org')<1)) continue; if (empty($v['url']) || empty($v['title']) || strpos($v['url'], 'www.chinacatholic.org') < 1) { echo '<b>invalid url:' . $v['url'] . '</b><br/>'; continue; } //$v = new_addslashes($v); $v['url'] = str_replace('/index/id', '', $v['url']); $v['title'] = strip_tags($v['title']); $md5 = md5($v['url']); if (!$db->get_one('id', 'faithlife', " md5url='{$md5}' ")) { $cinfo = get_content($v['url']); //获取发布时间、作者、来源、内容