Exemplo n.º 1
0
function url_page_range_test()
{
    global $_G;
    $url = rpc_str($_GET['url']);
    if (!strexists($url, '(*)')) {
        $new_arr = array($url);
        $count = 1;
    } else {
        $auto = $_GET['auto'];
        $start = $_GET['start'];
        $end = $_GET['end'];
        $step = $_GET['step'];
        if ($auto == 'undefined') {
            $auto = 0;
        }
        $range_arr = range($start, $end, $step);
        $count = count($range_arr);
        $start = intval($start);
        $end = intval($end);
        $step = intval($step);
        $max_len = strlen($range_arr[$count - 1]);
        if ($start == $end) {
            show_pick_window(milu_lang('get_link_list_test'), milu_lang('start_no_less_end'), array('w' => 620, 'h' => '400', 'f' => 1));
            exit;
        }
        if ($step == 0) {
            show_pick_window(milu_lang('get_link_list_test'), milu_lang('step_no_data'), array('w' => 620, 'h' => '400', 'f' => 1));
            exit;
        }
        if ($start > 1677215 || $end > 1677215) {
            show_pick_window(milu_lang('get_link_list_test'), milu_lang('long_data'), array('w' => 620, 'h' => '400', 'f' => 1));
            exit;
        }
        if ($count < 9) {
            $new_arr = convert_url_range(array('url' => $url, 'auto' => $auto, 'start' => $start, 'end' => $end, 'step' => $step));
        } else {
            $arr1 = array_slice($range_arr, 0, 4);
            array_push($arr1, 0);
            $arr2 = array_slice($range_arr, $count - 4, $count - 1);
            $arr = array_merge($arr1, $arr2);
            foreach ($arr as $k => $v) {
                if ($v == 0) {
                    $new_arr[$k] = 0;
                } else {
                    $v = $auto ? str_pad($v, $max_len, "0", STR_PAD_LEFT) : $v;
                    $key = array_search($v, $range_arr);
                    $new_arr[$key] = str_replace('(*)', $v, $url);
                }
            }
        }
    }
    $link_html = windos_show_link($new_arr, '', array('count' => $count));
    show_pick_window(milu_lang('get_link_list_test'), $link_html, array('w' => 620, 'h' => '400', 'f' => 1));
}
Exemplo n.º 2
0
 function get_start_url()
 {
     if ($this->p_arr['rules_type'] == 1) {
         //如果采集器采用内置规则
         $this->parse_rules();
     } else {
         if ($this->p_arr['rules_type'] == 2) {
             //自定义规则
             if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 2) {
                 //从分页列表采集文章或url范围
                 $args['step'] = $this->p_arr['page_url_auto_step'];
                 $args['start'] = $this->p_arr['page_url_auto_start'];
                 $args['end'] = $this->p_arr['page_url_auto_end'];
                 $args['url'] = $this->p_arr['url_page_range'];
                 $args['auto'] = $this->p_arr['page_url_auto'];
                 $this->now_url_arr = convert_url_range($args);
                 $this->max_level = 2;
                 if ($this->p_arr['url_range_type'] == 2) {
                     $this->max_level = 1;
                     $this->temp_arr['per_num'] = 1;
                 } else {
                     $this->temp_arr['page_num'] = count($this->now_url_arr);
                 }
             } else {
                 if ($this->p_arr['url_range_type'] == 4) {
                     //从rss地址
                     $this->now_url_arr = get_rss_url(2, $this->p_arr['rss_url']);
                     $this->max_level = 1;
                 } else {
                     if ($this->p_arr['url_range_type'] == 5) {
                         //多层列表
                         $this->now_url_arr = array($this->p_arr['many_list_start_url']);
                         $this->max_level = count($this->p_arr['many_page_list']) + 1;
                     }
                 }
             }
         } else {
             if ($this->p_arr['rules_type'] == 3) {
                 //一键采集
                 $start_arr = format_wrap($this->p_arr['manyou_start_url']);
                 $this->now_url = $start_arr[0];
                 $content = $this->parse_page();
                 $rules_info = match_rules($this->now_url, $content, 4, 0);
                 if ($rules_info && is_array($rules_info)) {
                     $this->pick_cache_data['lilely_page'][] = $this->now_url;
                     if ($rules_info['page_get_type'] == 1) {
                         $this->now_url_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $this->now_url));
                     } else {
                         $this->now_url_arr = string_page_link($content, trim($rules_info['page_link_rules']), $this->now_url);
                     }
                 }
                 $page_url_arr = parse_url($this->now_url);
                 parse_str($page_url_arr['query'], $url_info);
                 $index_url = $auto = 0;
                 if (is_numeric($url_info['page'])) {
                     $var_url = str_replace('page=' . $url_info['page'], 'page=(*)', $this->now_url);
                     $this->pick_cache_data['lilely_page'][] = $this->now_url;
                 } else {
                     $page_all_link = $this->parse_page('link', $content);
                     $page_all_link = array_filter($page_all_link, 'filter_url_callback');
                     $likely_arr[0] = $this->now_url;
                     foreach ((array) $page_all_link as $k => $v) {
                         similar_text($v, $this->now_url, $percent);
                         if ($percent < 90) {
                             continue;
                         }
                         $likely_arr[] = $v;
                     }
                     $likely_arr = array_resolve($likely_arr);
                     $var_arr = get_url_diff($likely_arr);
                     $var_url = $var_arr['url'];
                     $index_url = $var_arr['index'];
                     $auto = $var_arr['auto'];
                     if ($var_url && is_array($likely_arr)) {
                         $key = array_rand($likely_arr);
                         $this->pick_cache_data['lilely_page'][] = $likely_arr[$key];
                     }
                 }
                 if ($var_url) {
                     $this->now_url_arr = convert_url_range(array('url' => $var_url, 'step' => 1, 'start' => $var_arr['index'] ? 2 : 1, 'end' => 99, 'auto' => $auto));
                     if ($var_arr['index']) {
                         array_unshift($this->now_url_arr, $var_arr['index']);
                     }
                     $this->max_level = 2;
                 } else {
                     $this->now_url_arr = $start_arr;
                     $this->max_level = $this->max_level ? $this->max_level : 2;
                 }
                 //print_r($this->now_url_arr);exit();
                 $this->max_level = $this->p_arr['manyou_max_level'] ? $this->p_arr['manyou_max_level'] : 2;
             }
         }
     }
     if ($this->p_arr['page_fiter'] == 1 && $this->now_url_arr) {
         //开启了过滤网址功能
         if ($this->p_arr['page_url_other']) {
             $this->now_url_arr = array_merge(format_wrap($this->p_arr['page_url_other']), $this->now_url_arr);
             $this->temp_arr['page_num'] = count($this->now_url_arr);
         }
     }
     $this->pick_cache_data['max_level'] = $this->max_level;
 }