function many_list_get_page($rules_arr, $start_url = '') { extract($rules_arr); //print_r($rules_arr); //exit(); $url = $start_url ? $start_url : $test; $rules = stripslashes($rules); $content = get_contents($url, array('login_cookie' => $login_cookie, 'cache' => -1)); if ($type == 1) { //dom $link_arr = dom_page_link($content, array('page_link_rules' => $rules, 'url_page_range' => $url)); } else { $link_arr = string_page_link($content, $rules, $url); } return $link_arr; }
function evo_get_pagelink($content, $url, $list = array()) { $list = $list ? $list : $url; $rules_info = match_rules($url, $content, 4, 0); if ($rules_info && is_array($rules_info)) { if ($rules_info['page_get_type'] == 1) { $link_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $url)); } else { if ($rules_info['page_get_type'] == 2) { $link_arr = string_page_link($content, trim($rules_info['page_link_rules']), $url); } } } if ($link_arr) { return $link_arr; } $base_url = get_base_url($content); $base_url = $base_url ? $base_url : $url; $link_arr = _striplinks($content, $base_url); if (!$link_arr) { return array(); } foreach ((array) $link_arr as $k => $v_url) { if (!check_fastpick_viewurl($v_url, $url)) { unset($link_arr[$k]); continue; } $c_arr[$k] = strlen($v_url); } $value_count_arr = array_count_values($c_arr); arsort($value_count_arr); $value_count_arr = array_keys($value_count_arr); $view_lenth = array_shift($value_count_arr); $link_arr = array_resolve($link_arr); foreach ($link_arr as $k => $v) { if (abs(strlen($v) - $view_lenth) > 5) { unset($link_arr[$k]); } } $link_arr = array_filter($link_arr, 'filter_url_callback'); return $link_arr; }
function get_content_page($content) { if ($this->p_arr['content_page_get_type'] == 1) { $html = get_htmldom_obj($content); if (!$html) { return false; } foreach ($html->find($this->p_arr['content_page_rules']) as $v) { $a_url = $this->format_url($v->attr['href']); if (!$a_url || $a_url == '#' || $v->innertext == milu_lang('up_page')) { continue; } $item[] = _expandlinks($a_url, $this->base_url); $re_arr = sarray_unique($item); } $html->clear(); unset($html); } else { $re_arr = string_page_link($content, $this->p_arr['content_page_rules'], $this->now_url); //字符串 } return $re_arr; }