function url_page_range_test() { global $_G; $url = rpc_str($_GET['url']); if (!strexists($url, '(*)')) { $new_arr = array($url); $count = 1; } else { $auto = $_GET['auto']; $start = $_GET['start']; $end = $_GET['end']; $step = $_GET['step']; if ($auto == 'undefined') { $auto = 0; } $range_arr = range($start, $end, $step); $count = count($range_arr); $start = intval($start); $end = intval($end); $step = intval($step); $max_len = strlen($range_arr[$count - 1]); if ($start == $end) { show_pick_window(milu_lang('get_link_list_test'), milu_lang('start_no_less_end'), array('w' => 620, 'h' => '400', 'f' => 1)); exit; } if ($step == 0) { show_pick_window(milu_lang('get_link_list_test'), milu_lang('step_no_data'), array('w' => 620, 'h' => '400', 'f' => 1)); exit; } if ($start > 1677215 || $end > 1677215) { show_pick_window(milu_lang('get_link_list_test'), milu_lang('long_data'), array('w' => 620, 'h' => '400', 'f' => 1)); exit; } if ($count < 9) { $new_arr = convert_url_range(array('url' => $url, 'auto' => $auto, 'start' => $start, 'end' => $end, 'step' => $step)); } else { $arr1 = array_slice($range_arr, 0, 4); array_push($arr1, 0); $arr2 = array_slice($range_arr, $count - 4, $count - 1); $arr = array_merge($arr1, $arr2); foreach ($arr as $k => $v) { if ($v == 0) { $new_arr[$k] = 0; } else { $v = $auto ? str_pad($v, $max_len, "0", STR_PAD_LEFT) : $v; $key = array_search($v, $range_arr); $new_arr[$key] = str_replace('(*)', $v, $url); } } } } $link_html = windos_show_link($new_arr, '', array('count' => $count)); show_pick_window(milu_lang('get_link_list_test'), $link_html, array('w' => 620, 'h' => '400', 'f' => 1)); }
function get_start_url() { if ($this->p_arr['rules_type'] == 1) { //如果采集器采用内置规则 $this->parse_rules(); } else { if ($this->p_arr['rules_type'] == 2) { //自定义规则 if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 2) { //从分页列表采集文章或url范围 $args['step'] = $this->p_arr['page_url_auto_step']; $args['start'] = $this->p_arr['page_url_auto_start']; $args['end'] = $this->p_arr['page_url_auto_end']; $args['url'] = $this->p_arr['url_page_range']; $args['auto'] = $this->p_arr['page_url_auto']; $this->now_url_arr = convert_url_range($args); $this->max_level = 2; if ($this->p_arr['url_range_type'] == 2) { $this->max_level = 1; $this->temp_arr['per_num'] = 1; } else { $this->temp_arr['page_num'] = count($this->now_url_arr); } } else { if ($this->p_arr['url_range_type'] == 4) { //从rss地址 $this->now_url_arr = get_rss_url(2, $this->p_arr['rss_url']); $this->max_level = 1; } else { if ($this->p_arr['url_range_type'] == 5) { //多层列表 $this->now_url_arr = array($this->p_arr['many_list_start_url']); $this->max_level = count($this->p_arr['many_page_list']) + 1; } } } } else { if ($this->p_arr['rules_type'] == 3) { //一键采集 $start_arr = format_wrap($this->p_arr['manyou_start_url']); $this->now_url = $start_arr[0]; $content = $this->parse_page(); $rules_info = match_rules($this->now_url, $content, 4, 0); if ($rules_info && is_array($rules_info)) { $this->pick_cache_data['lilely_page'][] = $this->now_url; if ($rules_info['page_get_type'] == 1) { $this->now_url_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $this->now_url)); } else { $this->now_url_arr = string_page_link($content, trim($rules_info['page_link_rules']), $this->now_url); } } $page_url_arr = parse_url($this->now_url); parse_str($page_url_arr['query'], $url_info); $index_url = $auto = 0; if (is_numeric($url_info['page'])) { $var_url = str_replace('page=' . $url_info['page'], 'page=(*)', $this->now_url); $this->pick_cache_data['lilely_page'][] = $this->now_url; } else { $page_all_link = $this->parse_page('link', $content); $page_all_link = array_filter($page_all_link, 'filter_url_callback'); $likely_arr[0] = $this->now_url; foreach ((array) $page_all_link as $k => $v) { similar_text($v, $this->now_url, $percent); if ($percent < 90) { continue; } $likely_arr[] = $v; } $likely_arr = array_resolve($likely_arr); $var_arr = get_url_diff($likely_arr); $var_url = $var_arr['url']; $index_url = $var_arr['index']; $auto = $var_arr['auto']; if ($var_url && is_array($likely_arr)) { $key = array_rand($likely_arr); $this->pick_cache_data['lilely_page'][] = $likely_arr[$key]; } } if ($var_url) { $this->now_url_arr = convert_url_range(array('url' => $var_url, 'step' => 1, 'start' => $var_arr['index'] ? 2 : 1, 'end' => 99, 'auto' => $auto)); if ($var_arr['index']) { array_unshift($this->now_url_arr, $var_arr['index']); } $this->max_level = 2; } else { $this->now_url_arr = $start_arr; $this->max_level = $this->max_level ? $this->max_level : 2; } //print_r($this->now_url_arr);exit(); $this->max_level = $this->p_arr['manyou_max_level'] ? $this->p_arr['manyou_max_level'] : 2; } } } if ($this->p_arr['page_fiter'] == 1 && $this->now_url_arr) { //开启了过滤网址功能 if ($this->p_arr['page_url_other']) { $this->now_url_arr = array_merge(format_wrap($this->p_arr['page_url_other']), $this->now_url_arr); $this->temp_arr['page_num'] = count($this->now_url_arr); } } $this->pick_cache_data['max_level'] = $this->max_level; }