Example #1
0
function many_list_get_page($rules_arr, $start_url = '')
{
    extract($rules_arr);
    //print_r($rules_arr);
    //exit();
    $url = $start_url ? $start_url : $test;
    $rules = stripslashes($rules);
    $content = get_contents($url, array('login_cookie' => $login_cookie, 'cache' => -1));
    if ($type == 1) {
        //dom
        $link_arr = dom_page_link($content, array('page_link_rules' => $rules, 'url_page_range' => $url));
    } else {
        $link_arr = string_page_link($content, $rules, $url);
    }
    return $link_arr;
}
Example #2
0
function evo_get_pagelink($content, $url, $list = array())
{
    $list = $list ? $list : $url;
    $rules_info = match_rules($url, $content, 4, 0);
    if ($rules_info && is_array($rules_info)) {
        if ($rules_info['page_get_type'] == 1) {
            $link_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $url));
        } else {
            if ($rules_info['page_get_type'] == 2) {
                $link_arr = string_page_link($content, trim($rules_info['page_link_rules']), $url);
            }
        }
    }
    if ($link_arr) {
        return $link_arr;
    }
    $base_url = get_base_url($content);
    $base_url = $base_url ? $base_url : $url;
    $link_arr = _striplinks($content, $base_url);
    if (!$link_arr) {
        return array();
    }
    foreach ((array) $link_arr as $k => $v_url) {
        if (!check_fastpick_viewurl($v_url, $url)) {
            unset($link_arr[$k]);
            continue;
        }
        $c_arr[$k] = strlen($v_url);
    }
    $value_count_arr = array_count_values($c_arr);
    arsort($value_count_arr);
    $value_count_arr = array_keys($value_count_arr);
    $view_lenth = array_shift($value_count_arr);
    $link_arr = array_resolve($link_arr);
    foreach ($link_arr as $k => $v) {
        if (abs(strlen($v) - $view_lenth) > 5) {
            unset($link_arr[$k]);
        }
    }
    $link_arr = array_filter($link_arr, 'filter_url_callback');
    return $link_arr;
}
Example #3
0
 function get_content_page($content)
 {
     if ($this->p_arr['content_page_get_type'] == 1) {
         $html = get_htmldom_obj($content);
         if (!$html) {
             return false;
         }
         foreach ($html->find($this->p_arr['content_page_rules']) as $v) {
             $a_url = $this->format_url($v->attr['href']);
             if (!$a_url || $a_url == '#' || $v->innertext == milu_lang('up_page')) {
                 continue;
             }
             $item[] = _expandlinks($a_url, $this->base_url);
             $re_arr = sarray_unique($item);
         }
         $html->clear();
         unset($html);
     } else {
         $re_arr = string_page_link($content, $this->p_arr['content_page_rules'], $this->now_url);
         //字符串
     }
     return $re_arr;
 }