function get_single_article($content, $url, $args = array()) { global $_G; extract($args); if (strlen(trim($content)) < 1) { return; } d_s('evo'); $get_type = $_GET['get_type'] ? intval($_GET['get_type']) : $get_type; $get_type = $get_type ? $get_type : 1; $milu_set = pick_common_get(); $rules_info = match_rules($url, $content, $get_type, 0); if (is_array($rules_info)) { pload('F:fastpick'); $data = rules_get_article($content, $rules_info); write_evo_errlog($data, $url, $rules_info); } else { $data = (array) cloud_match_rules($get_type, $url, $content); //从云端下载规则 这里应该做点优化,暂时没想到方法。 if (!$data['content'] && $milu_set['fp_open_auto'] == 1) { //开启智能获取 pload('C:HtmlExtractor'); pload('F:article'); $he = new HtmlExtractor($content, $url); $data = (array) $he->get_text(); $data['content'] = dz_attach_format($url, $data['content']); $arr = format_article_imgurl($url, $data['content']); $data['content'] = $arr['message']; $del_dom_rules = array('div[id*=share]', 'div[class*=page]'); foreach ($del_dom_rules as $k => $v) { $data['content'] = dom_filter_something($data['content'], $v, 2); } unset($data['evo_title_info']); } } if ($_GET['type'] == 'bbs') { $data['content'] = media_htmlbbcode($data['content'], $url); $data['content'] = img_htmlbbcode($data['content'], $url); } $data['evo_time'] = d_e(0, 'evo'); return $data; }
function get_web_avatar($args, $i = 0) { d_s(); $info = $args; if ($i > 49) { show_pick_info(milu_lang('alway_no_get'), 'show_err'); exit; } extract($args); $_SESSION['avatar_get']['now_get'] = $info['now_get'] = $now_get; $_SESSION['avatar_get']['avatar_get_uid'] = $info['avatar_get_uid'] = $avatar_get_uid; $_SESSION['avatar_get']['all_get_time'] = $all_get_time; $avatar = get_avatar($avatar_get_uid, 'middle'); $icon_url = $avatar_web_url . $avatar; show_pick_info(array(milu_lang('the_uid'), ' <a target="_blank" href="' . $icon_url . '">' . $avatar_get_uid . '</a>' . milu_lang('the_avatar')), 'left', array('li_no_end' => 1, 'no_border' => 1, 'now' => $now_get)); $snoopy_args = array(); $snoopy_obj = get_snoopy_obj($snoopy_args); if (!$snoopy_obj) { show_pick_info(milu_lang('no_get_avatar'), 'err', $show_arr); $info['all_get_time'] = $all_get_time; $info['now_get']++; $info['avatar_get_uid']++; avatar_page_jump($now_get, $avata_jump_num, $get_count); return get_web_avatar($info, $i + 1); } $img_re = get_img_content($icon_url, $snoopy_obj); //得到的是middle的头像 $show_arr = get_show_arr($now_get, $success_count, $get_count, $all_get_time); $info['all_get_time'] = $show_arr['all_get_time']; if (!$img_re) { show_pick_info(milu_lang('avatar_no_exists'), 'err', $show_arr); $info['now_get']++; $info['avatar_get_uid']++; avatar_page_jump($now_get, $avata_jump_num, $get_count); return get_web_avatar($info, $i + 1); } if (strlen($img_re) == 3972 || strlen($img_re) < 1000) { //3972是discuz默认头像的大小 show_pick_info(milu_lang('user_no_set_avatar'), 'err', $show_arr); $info['now_get']++; $info['avatar_get_uid']++; avatar_page_jump($now_get, $avata_jump_num, $get_count); return get_web_avatar($info, $i + 1); } else { //得到头像 $now_time = time(); $show_arr['show_js'] = 'show_icon(\'' . $show_arr['now'] . '\');'; show_pick_info('<img width="48" height="48" style="margin:5px 0;float:right;" src="' . $icon_url . '">', 'success', $show_arr); show_pick_info(array(milu_lang('the_uid_set'), '<a target="_blank" href="home.php?mod=space&uid=' . $uid . '&do=profile">' . $uid . '</a>' . milu_lang('the_user_set_avatar')), 'left', array('li_no_end' => 1, 'no_border' => 1, 'now' => '-' . $show_arr['now'] . $now_time)); $size_arr = array('middle', 'big', 'small'); //顺序一定不可以变 $create_re = create_avatar_dir($uid, $size); //建立头像目录 if (!$create_re) { show_pick_info(milu_lang('avatar_dir_no_wirte'), 'err', $show_arr); return FALSE; } foreach ($size_arr as $size) { if ($size != 'middle') { $icon_url = $avatar_web_url . get_avatar($avatar_get_uid, $size); $img_re = get_img_content($icon_url, $snoopy_obj); } $avatar_dir_save = './uc_server/' . get_avatar($uid, $size); if ($cover_avatar == 1 && file_exists($avatar_dir_save)) { //覆盖旧头像 @unlink($avatar_dir_save); } $put_re = file_put_contents($avatar_dir_save, $img_re); //写入头像 if (!$put_re) { show_pick_info(milu_lang('avatar_dir_no_wirte'), 'err', $show_arr); return FALSE; } } $success_count++; $_SESSION['avatar_get']['success_count'] = $success_count; $show_arr = get_show_arr($now_get, $success_count, $get_count, $all_get_time); $show_arr['now_get']++; $show_arr['avatar_get_uid']++; $show_arr['get_count'] = $get_count; $show_info_arr = $show_arr; $show_info_arr['now'] = '-' . $show_arr['now'] . $now_time; show_pick_info(milu_lang('success'), 'success', $show_info_arr); avatar_page_jump($now_get, $avata_jump_num, $get_count); $arr = array('content' => $img_re, 'avatar_get_uid' => $avatar_get_uid + 1, 'now_get' => $now_get + 1, 'success_count' => $success_count, 'get_count' => $get_count); return $show_arr ? array_merge($show_arr, $arr) : $arr; } }
function pick_match_rules() { $url = format_url($_GET['url']); d_s(); $content = get_contents($url); $v = match_rules($url, $content, 2, 0); if (!$v || !is_array($v)) { $v = pick_match_coloud_rules($url); if ($v['data_type'] == 1) { pload('F:rules'); $v = $v['data']; rules_add($v); del_search_index(2); } } if (!$v || !is_array($v)) { return 'no'; } $re_arr = array($v['rules_type'], $v['rules_hash']); return json_encode($re_arr); }
function fast_pick() { global $_G; d_s('f_g'); d_s('g_t'); pload('F:spider'); $url = $_GET['url']; $content = get_contents($url, array('cache' => -1)); $get_time = d_e(0, 'g_t'); $type = $_GET['type'] ? $_GET['type'] : 'bbs'; $milu_set = pick_common_get(); $data = (array) get_single_article($content, $url); if ($milu_set['fp_word_replace_open'] == 1 && !VIP) { //开启同义词替换 $words = get_replace_words(); if ($data['title']) { $data['title'] = strtr($data['title'], $words); } if ($data['content']) { $data['content'] = strtr($data['content'], $words); } } if ($milu_set['fp_article_from'] == 1) { //开启来源 $data['fromurl'] = $url; if ($type == 'bbs' && $data['content']) { $data['content'] .= "[p=30, 2, left]" . milu_lang('article_from') . ':' . $url . "[/p]"; } } $data['get_text_time'] = $get_time; $data['all_get_time'] = d_e(0, 'f_g'); $data = $data ? $data : array(); $data = js_base64_encode($data); $re = json_encode($data); return $re; }
function robot($level) { global $_G; $pick_config = $_G['cache']['evn_milu_pick']['pick_config']; $del_flag = 0; $this->now_level = $level; if (!$this->now_url_arr) { $this->restart_robot($this->now_level); } if (!$this->pick_cache_data['url_arr'][$this->now_level]) { $this->pick_cache_data['url_arr'][$this->now_level] = $this->now_url_arr; } foreach ((array) $this->now_url_arr as $k => $url) { d_s('run'); if ($this->p_arr['pick_num'] && $this->i == $this->p_arr['pick_num'] + 2 || $this->p_arr['pick_num'] && $this->i > $this->p_arr['pick_num'] + 2) { return; } $this->pick_cache_data['now_level'] = $this->now_level; $this->now_url = $url; if ($this->p_arr['url_range_type'] == 3 || $this->now_level == $this->p_arr['manyou_max_level']) { $host_arr = $this->GetHostInfo($url); $this->base_url = $host_arr['host']; } $this->format_url(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $this->i)); show_pick_info(array(milu_lang('read_link'), $this->now_url), 'url', $show_args); $this->i++; $this->temp_arr['have_reply'] = 0; $this->pick_cache_data['i'] = $this->i; $visit_flag = $this->check_visit_url(); if ($visit_flag > 0) { if ($this->now_level == 1) { if ($this->p_arr['rules_type'] == 3) { //若是一键采集,判断此网址是否是文章页 if (!$this->check_fastpick_viewurl($this->now_url)) { continue; } else { //exit($this->now_url); } } $content = $this->parse_page(); $this->status_arr['now'] = $this->i; show_pick_info('', 'success', $this->status_arr); if ($this->p_arr['stop_time'][0]) { sleep($this->p_arr['stop_time'][0]); } $get = 0; $this->temp_arr['have_page'] = 0; if ($this->p_arr['content_page_rules']) { //分页文章 if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) { //回复 } else { $content_page_arr = $this->get_content_page($content); if ($content_page_arr) { $get = 1; $this->a++; $this->pick_cache_data['a'] = $this->a; $this->temp_arr['have_page'] = 1; $article_info_arr = $this->page_get_content($content, array(), array(), $content_page_arr); if ($article_info_arr) { //取其他内容 $other_arr = $this->get_article_other($content); $other_arr = $other_arr ? $other_arr : array(); $article_info_arr = array_merge($article_info_arr, $other_arr); $this->create_page_article($article_info_arr); //分页文章的入库 } else { $this->v_a++; $this->pick_cache_data['v_a'] = $this->v_a; } } } } if ($get == 0) { //普通文章 $ori_title = $this->get_ori_title($content); $now = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); show_pick_info(array(milu_lang('read_content'), cutstr($ori_title, 85)), 'left', $show_args); $article_info = $this->get_article($content); $this->status_arr['now'] = $now; show_pick_info('', 'success', $this->status_arr); $article_info = $this->format_article($article_info); $this->get_pick_status(); $this->status_arr['now'] = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); $this->temp_arr['normal_now'] = $now; show_pick_info(array(milu_lang('article'), cutstr(trim($article_info['title']), 85)), 'left', $show_args); if ($this->check_article($article_info)) { $this->create_article($article_info); } else { $this->v_a++; $this->pick_cache_data['v_a'] = $this->v_a; } } $this->insert_url(); if ($this->aid || $this->public_info['insert_aid']) { if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) { //文章有回复 if ($this->p_arr['is_public_del'] == 1 && $this->p_arr['is_auto_public'] == 1 && $this->p_arr['public_type'] != 2) { //如果直接发布,而且是发布不入库,而且不是发布到论坛,就不必采集回复 } else { $now = '-' . ($this->i - 1) . time(); $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now)); $this->temp_arr['reply_now'] = $now; show_pick_info(array(milu_lang('pick_reply')), 'left', $show_args); if (strexists($this->p_arr['reply_max_num'], ',')) { $arr = explode(',', $this->p_arr['reply_max_num']); $this->reply_max_num = rand($arr[0], $arr[1]); } else { $this->reply_max_num = intval($this->p_arr['reply_max_num']); } $this->oldurl_arr = NULL; $reply_arr = $this->page_get_reply($content, array($this->now_url)); $reply_arr = sarray_unique($reply_arr); //去重复处理 $this->create_reply($reply_arr); $this->oldurl_arr = NULL; $this->temp_arr['have_reply'] = 1; } } } } $msg = ''; $link_count = 0; $next_link = array(); if ($this->now_level > 1) { if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 5 || $this->p_arr['rules_type'] == 1) { //分页列表或多层列表获取是内置规则 if ($this->p_arr['url_range_type'] == 5) { $key_level = abs($this->now_level - 1 - count($this->p_arr['many_page_list'])) + 1; $rules_arr = $this->p_arr['many_page_list'][$key_level]; } else { if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['rules_type'] == 1) { $rules_arr['type'] = $this->p_arr['page_get_type']; $rules_arr['rules'] = $this->p_arr['page_link_rules']; } } $content = $this->parse_page(); if ($rules_arr['type'] == 1) { $next_link = dom_page_link($content, array('page_link_rules' => $rules_arr['rules'], 'url' => $this->now_url)); } else { if ($rules_arr['type'] == 2) { $next_link = string_page_link($content, trim($rules_arr['rules']), $this->now_url); } else { $next_link = evo_get_pagelink($content, $this->now_url); } } if ($this->p_arr['url_range_type'] == 1 && !$rules_arr['rules']) { $msg = ' : ' . milu_lang('no_set_list_rules'); } $link_count = $this->temp_arr['per_num'] = count($next_link); if ($link_count == 0 && $rules_arr['rules']) { $msg = ' : ' . milu_lang('check_list_rules'); } $this->get_pick_count(); } else { if ($this->p_arr['rules_type'] == 3) { //一键采集 $content = $this->parse_page(); $next_link = evo_get_pagelink($content, $this->now_url, $this->pick_cache_data['lilely_page']); $link_count = count($next_link); } } $this->get_pick_status(1); show_pick_info(milu_lang('get_link_c', array('c' => $link_count)) . $msg, $link_count > 0 ? 'success' : 'err', $this->status_arr); if ($next_link) { $this->pick_cache_data['url_arr'][$this->now_level - 1] = $this->now_url_arr = $next_link; } } else { $next_link = $this->now_url_arr = $this->pick_cache_data['url_arr'][$this->now_level]; } if (!$this->flip()) { return; } $this->del_session_arr($this->now_level); if (!$this->pick_cache_data['url_arr']) { return; } $del_flag = 1; if ($this->now_level > 1 && $next_link) { $this->now_level -= 1; $this->robot($level - 1); } } else { $this->v_i++; $this->pick_cache_data['v_i'] = $this->v_i; $this->get_pick_status(1); show_pick_info(milu_lang('no_visit_err' . $visit_flag), 'err', $this->status_arr); if (!$this->flip()) { return; } } if ($del_flag != 1) { $this->del_session_arr($this->now_level); } } $this->now_level += 1; $this->restart_robot($this->now_level); }