コード例 #1
0
ファイル: function.pick.php プロジェクト: edmundwong/V604
function system_get_link_test()
{
    global $_G;
    pload('F:spider');
    $type = format_url($_REQUEST['type']);
    $is_filter = format_url($_REQUEST['is_filter']);
    $url = format_url($_REQUEST['c']);
    //$url = 'http://www.discuz.net/forum-26-1.html';//debug
    $rule = trim(str_iconv(format_url($_REQUEST['b'])));
    //记得转换中文
    $page_url_no_other = format_url($_REQUEST['page_url_no_other']);
    $page_url_contain = format_url($_REQUEST['page_url_contain']);
    $page_url_no_contain = format_url($_REQUEST['page_url_no_contain']);
    $page_url_no_other = format_url($_REQUEST['page_url_no_other']);
    $login_cookie = format_cookie($_GET['login_cookie']);
    if ($_GET['dom_type'] == 'content_page') {
        $dom_type = 1;
    } else {
        $dom_type = 0;
    }
    $content = get_contents($url, array('cookie' => $login_cookie));
    if ($type == 1) {
        //dom
        if ($content != -1) {
            $link_arr = dom_page_link($content, array('page_link_rules' => $rule, 'url' => $url), $dom_type);
        }
    } else {
        if ($type == 2) {
            //字符
            if ($content != -1) {
                $link_arr = string_page_link($content, $rule, $url);
            }
        } else {
            //智能
            $link_arr = evo_get_pagelink($content, $url);
            //echo count($link_arr);
        }
    }
    //print_r($link_arr);
    //exit();
    if ($is_filter == 1 && $link_arr) {
        $args = array('page_url_no_other' => $page_url_no_other, 'page_url_contain' => $page_url_contain, 'page_url_no_contain' => $page_url_no_contain, 'page_url_no_other' => $page_url_no_other);
    }
    if ($content == -1) {
        $link_html = milu_lang('unable_pick');
    } else {
        if ($content == -2) {
            $link_html = milu_lang('get_time_out');
        } else {
            $link_html = windos_show_link($link_arr, '', array(), $args);
        }
    }
    show_pick_window(milu_lang('get_link_test'), $link_html, array('w' => 620, 'h' => '400', 'f' => 1));
}
コード例 #2
0
ファイル: pick.class.php プロジェクト: edmundwong/V604
 function robot($level)
 {
     global $_G;
     $pick_config = $_G['cache']['evn_milu_pick']['pick_config'];
     $del_flag = 0;
     $this->now_level = $level;
     if (!$this->now_url_arr) {
         $this->restart_robot($this->now_level);
     }
     if (!$this->pick_cache_data['url_arr'][$this->now_level]) {
         $this->pick_cache_data['url_arr'][$this->now_level] = $this->now_url_arr;
     }
     foreach ((array) $this->now_url_arr as $k => $url) {
         d_s('run');
         if ($this->p_arr['pick_num'] && $this->i == $this->p_arr['pick_num'] + 2 || $this->p_arr['pick_num'] && $this->i > $this->p_arr['pick_num'] + 2) {
             return;
         }
         $this->pick_cache_data['now_level'] = $this->now_level;
         $this->now_url = $url;
         if ($this->p_arr['url_range_type'] == 3 || $this->now_level == $this->p_arr['manyou_max_level']) {
             $host_arr = $this->GetHostInfo($url);
             $this->base_url = $host_arr['host'];
         }
         $this->format_url();
         $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $this->i));
         show_pick_info(array(milu_lang('read_link'), $this->now_url), 'url', $show_args);
         $this->i++;
         $this->temp_arr['have_reply'] = 0;
         $this->pick_cache_data['i'] = $this->i;
         $visit_flag = $this->check_visit_url();
         if ($visit_flag > 0) {
             if ($this->now_level == 1) {
                 if ($this->p_arr['rules_type'] == 3) {
                     //若是一键采集,判断此网址是否是文章页
                     if (!$this->check_fastpick_viewurl($this->now_url)) {
                         continue;
                     } else {
                         //exit($this->now_url);
                     }
                 }
                 $content = $this->parse_page();
                 $this->status_arr['now'] = $this->i;
                 show_pick_info('', 'success', $this->status_arr);
                 if ($this->p_arr['stop_time'][0]) {
                     sleep($this->p_arr['stop_time'][0]);
                 }
                 $get = 0;
                 $this->temp_arr['have_page'] = 0;
                 if ($this->p_arr['content_page_rules']) {
                     //分页文章
                     if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) {
                         //回复
                     } else {
                         $content_page_arr = $this->get_content_page($content);
                         if ($content_page_arr) {
                             $get = 1;
                             $this->a++;
                             $this->pick_cache_data['a'] = $this->a;
                             $this->temp_arr['have_page'] = 1;
                             $article_info_arr = $this->page_get_content($content, array(), array(), $content_page_arr);
                             if ($article_info_arr) {
                                 //取其他内容
                                 $other_arr = $this->get_article_other($content);
                                 $other_arr = $other_arr ? $other_arr : array();
                                 $article_info_arr = array_merge($article_info_arr, $other_arr);
                                 $this->create_page_article($article_info_arr);
                                 //分页文章的入库
                             } else {
                                 $this->v_a++;
                                 $this->pick_cache_data['v_a'] = $this->v_a;
                             }
                         }
                     }
                 }
                 if ($get == 0) {
                     //普通文章
                     $ori_title = $this->get_ori_title($content);
                     $now = '-' . ($this->i - 1) . time();
                     $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                     show_pick_info(array(milu_lang('read_content'), cutstr($ori_title, 85)), 'left', $show_args);
                     $article_info = $this->get_article($content);
                     $this->status_arr['now'] = $now;
                     show_pick_info('', 'success', $this->status_arr);
                     $article_info = $this->format_article($article_info);
                     $this->get_pick_status();
                     $this->status_arr['now'] = '-' . ($this->i - 1) . time();
                     $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                     $this->temp_arr['normal_now'] = $now;
                     show_pick_info(array(milu_lang('article'), cutstr(trim($article_info['title']), 85)), 'left', $show_args);
                     if ($this->check_article($article_info)) {
                         $this->create_article($article_info);
                     } else {
                         $this->v_a++;
                         $this->pick_cache_data['v_a'] = $this->v_a;
                     }
                 }
                 $this->insert_url();
                 if ($this->aid || $this->public_info['insert_aid']) {
                     if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) {
                         //文章有回复
                         if ($this->p_arr['is_public_del'] == 1 && $this->p_arr['is_auto_public'] == 1 && $this->p_arr['public_type'] != 2) {
                             //如果直接发布,而且是发布不入库,而且不是发布到论坛,就不必采集回复
                         } else {
                             $now = '-' . ($this->i - 1) . time();
                             $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                             $this->temp_arr['reply_now'] = $now;
                             show_pick_info(array(milu_lang('pick_reply')), 'left', $show_args);
                             if (strexists($this->p_arr['reply_max_num'], ',')) {
                                 $arr = explode(',', $this->p_arr['reply_max_num']);
                                 $this->reply_max_num = rand($arr[0], $arr[1]);
                             } else {
                                 $this->reply_max_num = intval($this->p_arr['reply_max_num']);
                             }
                             $this->oldurl_arr = NULL;
                             $reply_arr = $this->page_get_reply($content, array($this->now_url));
                             $reply_arr = sarray_unique($reply_arr);
                             //去重复处理
                             $this->create_reply($reply_arr);
                             $this->oldurl_arr = NULL;
                             $this->temp_arr['have_reply'] = 1;
                         }
                     }
                 }
             }
             $msg = '';
             $link_count = 0;
             $next_link = array();
             if ($this->now_level > 1) {
                 if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 5 || $this->p_arr['rules_type'] == 1) {
                     //分页列表或多层列表获取是内置规则
                     if ($this->p_arr['url_range_type'] == 5) {
                         $key_level = abs($this->now_level - 1 - count($this->p_arr['many_page_list'])) + 1;
                         $rules_arr = $this->p_arr['many_page_list'][$key_level];
                     } else {
                         if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['rules_type'] == 1) {
                             $rules_arr['type'] = $this->p_arr['page_get_type'];
                             $rules_arr['rules'] = $this->p_arr['page_link_rules'];
                         }
                     }
                     $content = $this->parse_page();
                     if ($rules_arr['type'] == 1) {
                         $next_link = dom_page_link($content, array('page_link_rules' => $rules_arr['rules'], 'url' => $this->now_url));
                     } else {
                         if ($rules_arr['type'] == 2) {
                             $next_link = string_page_link($content, trim($rules_arr['rules']), $this->now_url);
                         } else {
                             $next_link = evo_get_pagelink($content, $this->now_url);
                         }
                     }
                     if ($this->p_arr['url_range_type'] == 1 && !$rules_arr['rules']) {
                         $msg = ' : ' . milu_lang('no_set_list_rules');
                     }
                     $link_count = $this->temp_arr['per_num'] = count($next_link);
                     if ($link_count == 0 && $rules_arr['rules']) {
                         $msg = ' : ' . milu_lang('check_list_rules');
                     }
                     $this->get_pick_count();
                 } else {
                     if ($this->p_arr['rules_type'] == 3) {
                         //一键采集
                         $content = $this->parse_page();
                         $next_link = evo_get_pagelink($content, $this->now_url, $this->pick_cache_data['lilely_page']);
                         $link_count = count($next_link);
                     }
                 }
                 $this->get_pick_status(1);
                 show_pick_info(milu_lang('get_link_c', array('c' => $link_count)) . $msg, $link_count > 0 ? 'success' : 'err', $this->status_arr);
                 if ($next_link) {
                     $this->pick_cache_data['url_arr'][$this->now_level - 1] = $this->now_url_arr = $next_link;
                 }
             } else {
                 $next_link = $this->now_url_arr = $this->pick_cache_data['url_arr'][$this->now_level];
             }
             if (!$this->flip()) {
                 return;
             }
             $this->del_session_arr($this->now_level);
             if (!$this->pick_cache_data['url_arr']) {
                 return;
             }
             $del_flag = 1;
             if ($this->now_level > 1 && $next_link) {
                 $this->now_level -= 1;
                 $this->robot($level - 1);
             }
         } else {
             $this->v_i++;
             $this->pick_cache_data['v_i'] = $this->v_i;
             $this->get_pick_status(1);
             show_pick_info(milu_lang('no_visit_err' . $visit_flag), 'err', $this->status_arr);
             if (!$this->flip()) {
                 return;
             }
         }
         if ($del_flag != 1) {
             $this->del_session_arr($this->now_level);
         }
     }
     $this->now_level += 1;
     $this->restart_robot($this->now_level);
 }