function Updan_zhua($url, $site_id) { global $db; $lrp = array(); $links = array(); $fen_link = array(); $nei_link = array(); $new_temp = array(); $cha_temp = array(); $lrp = cmi($url); $links = _striplinks($lrp[$url]); //从htmlcode中提取网址 $links = _expandlinks($links, $url); //补全网址 $fen_link = fen_link($links, $url); //把内链和外链分开 $nei_link = array_values(array_unique($fen_link[nei])); //过滤内链 重复的网址 //读出 ve123_sites_temp 中所有 site_id=-1 and no_id=0 $query = $db->query("select url from ve123_sites_temp where site_id='" . $site_id . "'"); while ($row = $db->fetch_array($query)) { $new_temp[] = $row[url]; } $cha_temp = array_diff($nei_link, $new_temp); //与内链进行比较 得出差集 //将差集创建到 ve123_sites_temp 中 foreach ((array) $cha_temp as $value) { $arral = array('url' => $value, 'site_id' => $site_id, 'no_id' => 0); $db->insert("ve123_sites_temp", $arral); } }
function evo_get_pagelink($content, $url, $list = array()) { $list = $list ? $list : $url; $rules_info = match_rules($url, $content, 4, 0); if ($rules_info && is_array($rules_info)) { if ($rules_info['page_get_type'] == 1) { $link_arr = dom_page_link($content, array('page_link_rules' => $rules_info['page_link_rules'], 'url' => $url)); } else { if ($rules_info['page_get_type'] == 2) { $link_arr = string_page_link($content, trim($rules_info['page_link_rules']), $url); } } } if ($link_arr) { return $link_arr; } $base_url = get_base_url($content); $base_url = $base_url ? $base_url : $url; $link_arr = _striplinks($content, $base_url); if (!$link_arr) { return array(); } foreach ((array) $link_arr as $k => $v_url) { if (!check_fastpick_viewurl($v_url, $url)) { unset($link_arr[$k]); continue; } $c_arr[$k] = strlen($v_url); } $value_count_arr = array_count_values($c_arr); arsort($value_count_arr); $value_count_arr = array_keys($value_count_arr); $view_lenth = array_shift($value_count_arr); $link_arr = array_resolve($link_arr); foreach ($link_arr as $k => $v) { if (abs(strlen($v) - $view_lenth) > 5) { unset($link_arr[$k]); } } $link_arr = array_filter($link_arr, 'filter_url_callback'); return $link_arr; }
function parse_page($type = 'content', $content = '') { $this->now_url = cnurl($this->now_url); if ($this->cache_time > 0 && ($message = load_cache($this->now_url)) || $content) { if ($content) { $message = $content; } $this->base_url = get_base_url($message); if (!$this->base_url) { $this->base_url = $this->now_url; } if ($type == 'content') { return $message; } else { if ($type == 'link') { return _striplinks($message, $this->base_url); } } } else { $time_out = $this->pick_set['time_out'] ? $this->pick_set['time_out'] : 15; $error = milu_lang('unable_pick'); if (!function_exists('fsockopen') && !function_exists('pfsockopen') && !function_exists('file_get_contents')) { show_pick_info($error, 'exit', $this->msg_args); return; } if (!function_exists('fsockopen') && !function_exists('pfsockopen')) { if (!function_exists('file_get_contents')) { show_pick_info($error, 'exit', $this->msg_args); return; } $content = file_get_contents($this->now_url); $content = str_iconv($content); return $content; } if (!$this->snoopy) { require_once PICK_DIR . '/lib/Snoopy.class.php'; //这些配置摆列顺序不可以随意 $this->snoopy = new Snoopy(); $this->snoopy->maxredirs = $this->p_arr['max_redirs'] ? $this->p_arr['max_redirs'] : 3; $this->snoopy->expandlinks = TRUE; $this->snoopy->offsiteok = TRUE; //是否允许向别的域名重定向 $this->snoopy->maxframes = 3; $this->snoopy->agent = $_SERVER['HTTP_USER_AGENT']; //不设置这里,有些网页没法获取 $this->snoopy->referer = $this->now_url; $this->snoopy->rawheaders["COOKIE"] = $this->p_arr['login_cookie']; $this->snoopy->read_timeout = $time_out; } if ($type == 'content') { $this->snoopy->results = get_contents($this->now_url, array('cookie' => $this->p_arr['login_cookie'], 'max_redirs' => $this->p_arr['max_redirs'], 'time_out' => $time_out, 'cache' => $this->cache_time)); } else { if ($type == 'link') { if ($this->snoopy->fetchlinks($this->now_url)) { } } } $this->base_url = get_base_url($this->snoopy->results); if (!$this->base_url) { $this->base_url = $this->now_url; } if ($this->snoopy->results) { cache_data($this->now_url, $this->snoopy->results, $this->cache_time); } return $this->snoopy->results; } }