function system_get_link_test() { global $_G; pload('F:spider'); $type = format_url($_REQUEST['type']); $is_filter = format_url($_REQUEST['is_filter']); $url = format_url($_REQUEST['c']); //$url = 'http://www.discuz.net/forum-26-1.html';//debug $rule = trim(str_iconv(format_url($_REQUEST['b']))); //记得转换中文 $page_url_no_other = format_url($_REQUEST['page_url_no_other']); $page_url_contain = format_url($_REQUEST['page_url_contain']); $page_url_no_contain = format_url($_REQUEST['page_url_no_contain']); $page_url_no_other = format_url($_REQUEST['page_url_no_other']); $login_cookie = format_cookie($_GET['login_cookie']); if ($_GET['dom_type'] == 'content_page') { $dom_type = 1; } else { $dom_type = 0; } $content = get_contents($url, array('cookie' => $login_cookie)); if ($type == 1) { //dom if ($content != -1) { $link_arr = dom_page_link($content, array('page_link_rules' => $rule, 'url' => $url), $dom_type); } } else { if ($type == 2) { //字符 if ($content != -1) { $link_arr = string_page_link($content, $rule, $url); } } else { //智能 $link_arr = evo_get_pagelink($content, $url); //echo count($link_arr); } } //print_r($link_arr); //exit(); if ($is_filter == 1 && $link_arr) { $args = array('page_url_no_other' => $page_url_no_other, 'page_url_contain' => $page_url_contain, 'page_url_no_contain' => $page_url_no_contain, 'page_url_no_other' => $page_url_no_other); } if ($content == -1) { $link_html = milu_lang('unable_pick'); } else { if ($content == -2) { $link_html = milu_lang('get_time_out'); } else { $link_html = windos_show_link($link_arr, '', array(), $args); } } show_pick_window(milu_lang('get_link_test'), $link_html, array('w' => 620, 'h' => '400', 'f' => 1)); }
function robot($level) { global $_G; $pick_config = $_G['cache']['evn_milu_pick']['pick_config']; $del_flag = 0; $this->now_level = $level; if (!$this->now_url_arr) { $this->restart_robot($this->now_level); } if (!$this->pick_cache_data['url_arr'][$this->now_level]) { $this->pick_cache_data['url_arr'][$this->now_level] = $this->now_url_arr; } foreach ((array) $this->now_url_arr as $k => $url) { d_s('run'); if ($this->p_arr['pick_num'] && $this->i == $this->p_arr['pick_num'] + 2 || $this->p_arr['pick_num'] && $this->i > $this->p_arr['pick_num'] + 2) { return; } $this->pick_cache_data['now_level'] = $this->now_level; $this->now_url = $url; if ($this->p_arr['url_range_type'] == 3 || $this->now_level == $this->p_arr['manyou_max_level']) { $host_arr = $this->GetHostInfo($url); $this->base_url = $host_arr['host']; } $this->format_url(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $this->i)); show_pick_info(array(milu_lang('read_link'), $this->now_url), 'url', $show_args); $this->i++; $this->temp_arr['have_reply'] = 0; $this->pick_cache_data['i'] = $this->i; $visit_flag = $this->check_visit_url(); if ($visit_flag > 0) { if ($this->now_level == 1) { if ($this->p_arr['rules_type'] == 3) { //若是一键采集,判断此网址是否是文章页 if (!$this->check_fastpick_viewurl($this->now_url)) { continue; } else { //exit($this->now_url); } } $content = $this->parse_page(); $this->status_arr['now'] = $this->i; show_pick_info('', 'success', $this->status_arr); if ($this->p_arr['stop_time'][0]) { sleep($this->p_arr['stop_time'][0]); } $get = 0; $this->temp_arr['have_page'] = 0; if ($this->p_arr['content_page_rules']) { //分页文章 if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) { //回复 } else { $content_page_arr = $this->get_content_page($content); if ($content_page_arr) { $get = 1; $this->a++; $this->pick_cache_data['a'] = $this->a; $this->temp_arr['have_page'] = 1; $article_info_arr = $this->page_get_content($content, array(), array(), $content_page_arr); if ($article_info_arr) { //取其他内容 $other_arr = $this->get_article_other($content); $other_arr = $other_arr ? $other_arr : array(); $article_info_arr = array_merge($article_info_arr, $other_arr); $this->create_page_article($article_info_arr); //分页文章的入库 } else { $this->v_a++; $this->pick_cache_data['v_a'] = $this->v_a; } } } } if ($get == 0) { //普通文章 $ori_title = $this->get_ori_title($content); $now = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); show_pick_info(array(milu_lang('read_content'), cutstr($ori_title, 85)), 'left', $show_args); $article_info = $this->get_article($content); $this->status_arr['now'] = $now; show_pick_info('', 'success', $this->status_arr); $article_info = $this->format_article($article_info); $this->get_pick_status(); $this->status_arr['now'] = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); $this->temp_arr['normal_now'] = $now; show_pick_info(array(milu_lang('article'), cutstr(trim($article_info['title']), 85)), 'left', $show_args); if ($this->check_article($article_info)) { $this->create_article($article_info); } else { $this->v_a++; $this->pick_cache_data['v_a'] = $this->v_a; } } $this->insert_url(); if ($this->aid || $this->public_info['insert_aid']) { if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) { //文章有回复 if ($this->p_arr['is_public_del'] == 1 && $this->p_arr['is_auto_public'] == 1 && $this->p_arr['public_type'] != 2) { //如果直接发布,而且是发布不入库,而且不是发布到论坛,就不必采集回复 } else { $now = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); $this->temp_arr['reply_now'] = $now; show_pick_info(array(milu_lang('pick_reply')), 'left', $show_args); if (strexists($this->p_arr['reply_max_num'], ',')) { $arr = explode(',', $this->p_arr['reply_max_num']); $this->reply_max_num = rand($arr[0], $arr[1]); } else { $this->reply_max_num = intval($this->p_arr['reply_max_num']); } $this->oldurl_arr = NULL; $reply_arr = $this->page_get_reply($content, array($this->now_url)); $reply_arr = sarray_unique($reply_arr); //去重复处理 $this->create_reply($reply_arr); $this->oldurl_arr = NULL; $this->temp_arr['have_reply'] = 1; } } } } $msg = ''; $link_count = 0; $next_link = array(); if ($this->now_level > 1) { if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 5 || $this->p_arr['rules_type'] == 1) { //分页列表或多层列表获取是内置规则 if ($this->p_arr['url_range_type'] == 5) { $key_level = abs($this->now_level - 1 - count($this->p_arr['many_page_list'])) + 1; $rules_arr = $this->p_arr['many_page_list'][$key_level]; } else { if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['rules_type'] == 1) { $rules_arr['type'] = $this->p_arr['page_get_type']; $rules_arr['rules'] = $this->p_arr['page_link_rules']; } } $content = $this->parse_page(); if ($rules_arr['type'] == 1) { $next_link = dom_page_link($content, array('page_link_rules' => $rules_arr['rules'], 'url' => $this->now_url)); } else { if ($rules_arr['type'] == 2) { $next_link = string_page_link($content, trim($rules_arr['rules']), $this->now_url); } else { $next_link = evo_get_pagelink($content, $this->now_url); } } if ($this->p_arr['url_range_type'] == 1 && !$rules_arr['rules']) { $msg = ' : ' . milu_lang('no_set_list_rules'); } $link_count = $this->temp_arr['per_num'] = count($next_link); if ($link_count == 0 && $rules_arr['rules']) { $msg = ' : ' . milu_lang('check_list_rules'); } $this->get_pick_count(); } else { if ($this->p_arr['rules_type'] == 3) { //一键采集 $content = $this->parse_page(); $next_link = evo_get_pagelink($content, $this->now_url, $this->pick_cache_data['lilely_page']); $link_count = count($next_link); } } $this->get_pick_status(1); show_pick_info(milu_lang('get_link_c', array('c' => $link_count)) . $msg, $link_count > 0 ? 'success' : 'err', $this->status_arr); if ($next_link) { $this->pick_cache_data['url_arr'][$this->now_level - 1] = $this->now_url_arr = $next_link; } } else { $next_link = $this->now_url_arr = $this->pick_cache_data['url_arr'][$this->now_level]; } if (!$this->flip()) { return; } $this->del_session_arr($this->now_level); if (!$this->pick_cache_data['url_arr']) { return; } $del_flag = 1; if ($this->now_level > 1 && $next_link) { $this->now_level -= 1; $this->robot($level - 1); } } else { $this->v_i++; $this->pick_cache_data['v_i'] = $this->v_i; $this->get_pick_status(1); show_pick_info(milu_lang('no_visit_err' . $visit_flag), 'err', $this->status_arr); if (!$this->flip()) { return; } } if ($del_flag != 1) { $this->del_session_arr($this->now_level); } } $this->now_level += 1; $this->restart_robot($this->now_level); }