示例#1
0
function Updan_zhua($url, $site_id)
{
    global $db;
    $lrp = array();
    $links = array();
    $fen_link = array();
    $nei_link = array();
    $new_temp = array();
    $cha_temp = array();
    $lrp = cmi($url);
    $links = _striplinks($lrp[$url]);
    //从htmlcode中提取网址
    $links = _expandlinks($links, $url);
    //补全网址
    $fen_link = fen_link($links, $url);
    //把内链和外链分开
    $nei_link = array_values(array_unique($fen_link[nei]));
    //过滤内链 重复的网址
    //读出 ve123_sites_temp 中所有 site_id=-1  and no_id=0
    $query = $db->query("select url from ve123_sites_temp where site_id='" . $site_id . "'");
    while ($row = $db->fetch_array($query)) {
        $new_temp[] = $row[url];
    }
    $cha_temp = array_diff($nei_link, $new_temp);
    //与内链进行比较 得出差集
    //将差集创建到 ve123_sites_temp 中
    foreach ((array) $cha_temp as $value) {
        $arral = array('url' => $value, 'site_id' => $site_id, 'no_id' => 0);
        $db->insert("ve123_sites_temp", $arral);
    }
}
示例#2
0
function evo_get_pagelink($content, $url, $list = array())
{
    $list = $list ? $list : $url;
    $rules_info = match_rules($url, $content, 4, 0);
    if ($rules_info && is_array($rules_info)) {
        if ($rules_info['page_get_type'] == 1) {
            $link_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $url));
        } else {
            if ($rules_info['page_get_type'] == 2) {
                $link_arr = string_page_link($content, trim($rules_info['page_link_rules']), $url);
            }
        }
    }
    if ($link_arr) {
        return $link_arr;
    }
    $base_url = get_base_url($content);
    $base_url = $base_url ? $base_url : $url;
    $link_arr = _striplinks($content, $base_url);
    if (!$link_arr) {
        return array();
    }
    foreach ((array) $link_arr as $k => $v_url) {
        if (!check_fastpick_viewurl($v_url, $url)) {
            unset($link_arr[$k]);
            continue;
        }
        $c_arr[$k] = strlen($v_url);
    }
    $value_count_arr = array_count_values($c_arr);
    arsort($value_count_arr);
    $value_count_arr = array_keys($value_count_arr);
    $view_lenth = array_shift($value_count_arr);
    $link_arr = array_resolve($link_arr);
    foreach ($link_arr as $k => $v) {
        if (abs(strlen($v) - $view_lenth) > 5) {
            unset($link_arr[$k]);
        }
    }
    $link_arr = array_filter($link_arr, 'filter_url_callback');
    return $link_arr;
}
示例#3
0
 function parse_page($type = 'content', $content = '')
 {
     $this->now_url = cnurl($this->now_url);
     if ($this->cache_time > 0 && ($message = load_cache($this->now_url)) || $content) {
         if ($content) {
             $message = $content;
         }
         $this->base_url = get_base_url($message);
         if (!$this->base_url) {
             $this->base_url = $this->now_url;
         }
         if ($type == 'content') {
             return $message;
         } else {
             if ($type == 'link') {
                 return _striplinks($message, $this->base_url);
             }
         }
     } else {
         $time_out = $this->pick_set['time_out'] ? $this->pick_set['time_out'] : 15;
         $error = milu_lang('unable_pick');
         if (!function_exists('fsockopen') && !function_exists('pfsockopen') && !function_exists('file_get_contents')) {
             show_pick_info($error, 'exit', $this->msg_args);
             return;
         }
         if (!function_exists('fsockopen') && !function_exists('pfsockopen')) {
             if (!function_exists('file_get_contents')) {
                 show_pick_info($error, 'exit', $this->msg_args);
                 return;
             }
             $content = file_get_contents($this->now_url);
             $content = str_iconv($content);
             return $content;
         }
         if (!$this->snoopy) {
             require_once PICK_DIR . '/lib/Snoopy.class.php';
             //这些配置摆列顺序不可以随意
             $this->snoopy = new Snoopy();
             $this->snoopy->maxredirs = $this->p_arr['max_redirs'] ? $this->p_arr['max_redirs'] : 3;
             $this->snoopy->expandlinks = TRUE;
             $this->snoopy->offsiteok = TRUE;
             //是否允许向别的域名重定向
             $this->snoopy->maxframes = 3;
             $this->snoopy->agent = $_SERVER['HTTP_USER_AGENT'];
             //不设置这里,有些网页没法获取
             $this->snoopy->referer = $this->now_url;
             $this->snoopy->rawheaders["COOKIE"] = $this->p_arr['login_cookie'];
             $this->snoopy->read_timeout = $time_out;
         }
         if ($type == 'content') {
             $this->snoopy->results = get_contents($this->now_url, array('cookie' => $this->p_arr['login_cookie'], 'max_redirs' => $this->p_arr['max_redirs'], 'time_out' => $time_out, 'cache' => $this->cache_time));
         } else {
             if ($type == 'link') {
                 if ($this->snoopy->fetchlinks($this->now_url)) {
                 }
             }
         }
         $this->base_url = get_base_url($this->snoopy->results);
         if (!$this->base_url) {
             $this->base_url = $this->now_url;
         }
         if ($this->snoopy->results) {
             cache_data($this->now_url, $this->snoopy->results, $this->cache_time);
         }
         return $this->snoopy->results;
     }
 }