예제 #1
0
function get_single_article($content, $url, $args = array())
{
    global $_G;
    extract($args);
    if (strlen(trim($content)) < 1) {
        return;
    }
    d_s('evo');
    $get_type = $_GET['get_type'] ? intval($_GET['get_type']) : $get_type;
    $get_type = $get_type ? $get_type : 1;
    $milu_set = pick_common_get();
    $rules_info = match_rules($url, $content, $get_type, 0);
    if (is_array($rules_info)) {
        pload('F:fastpick');
        $data = rules_get_article($content, $rules_info);
        write_evo_errlog($data, $url, $rules_info);
    } else {
        $data = (array) cloud_match_rules($get_type, $url, $content);
        //从云端下载规则 这里应该做点优化,暂时没想到方法。
        if (!$data['content'] && $milu_set['fp_open_auto'] == 1) {
            //开启智能获取
            pload('C:HtmlExtractor');
            pload('F:article');
            $he = new HtmlExtractor($content, $url);
            $data = (array) $he->get_text();
            $data['content'] = dz_attach_format($url, $data['content']);
            $arr = format_article_imgurl($url, $data['content']);
            $data['content'] = $arr['message'];
            $del_dom_rules = array('div[id*=share]', 'div[class*=page]');
            foreach ($del_dom_rules as $k => $v) {
                $data['content'] = dom_filter_something($data['content'], $v, 2);
            }
            unset($data['evo_title_info']);
        }
    }
    if ($_GET['type'] == 'bbs') {
        $data['content'] = media_htmlbbcode($data['content'], $url);
        $data['content'] = img_htmlbbcode($data['content'], $url);
    }
    $data['evo_time'] = d_e(0, 'evo');
    return $data;
}
예제 #2
0
function get_web_avatar($args, $i = 0)
{
    d_s();
    $info = $args;
    if ($i > 49) {
        show_pick_info(milu_lang('alway_no_get'), 'show_err');
        exit;
    }
    extract($args);
    $_SESSION['avatar_get']['now_get'] = $info['now_get'] = $now_get;
    $_SESSION['avatar_get']['avatar_get_uid'] = $info['avatar_get_uid'] = $avatar_get_uid;
    $_SESSION['avatar_get']['all_get_time'] = $all_get_time;
    $avatar = get_avatar($avatar_get_uid, 'middle');
    $icon_url = $avatar_web_url . $avatar;
    show_pick_info(array(milu_lang('the_uid'), ' <a target="_blank" href="' . $icon_url . '">' . $avatar_get_uid . '</a>' . milu_lang('the_avatar')), 'left', array('li_no_end' => 1, 'no_border' => 1, 'now' => $now_get));
    $snoopy_args = array();
    $snoopy_obj = get_snoopy_obj($snoopy_args);
    if (!$snoopy_obj) {
        show_pick_info(milu_lang('no_get_avatar'), 'err', $show_arr);
        $info['all_get_time'] = $all_get_time;
        $info['now_get']++;
        $info['avatar_get_uid']++;
        avatar_page_jump($now_get, $avata_jump_num, $get_count);
        return get_web_avatar($info, $i + 1);
    }
    $img_re = get_img_content($icon_url, $snoopy_obj);
    //得到的是middle的头像
    $show_arr = get_show_arr($now_get, $success_count, $get_count, $all_get_time);
    $info['all_get_time'] = $show_arr['all_get_time'];
    if (!$img_re) {
        show_pick_info(milu_lang('avatar_no_exists'), 'err', $show_arr);
        $info['now_get']++;
        $info['avatar_get_uid']++;
        avatar_page_jump($now_get, $avata_jump_num, $get_count);
        return get_web_avatar($info, $i + 1);
    }
    if (strlen($img_re) == 3972 || strlen($img_re) < 1000) {
        //3972是discuz默认头像的大小
        show_pick_info(milu_lang('user_no_set_avatar'), 'err', $show_arr);
        $info['now_get']++;
        $info['avatar_get_uid']++;
        avatar_page_jump($now_get, $avata_jump_num, $get_count);
        return get_web_avatar($info, $i + 1);
    } else {
        //得到头像
        $now_time = time();
        $show_arr['show_js'] = 'show_icon(\'' . $show_arr['now'] . '\');';
        show_pick_info('<img width="48" height="48" style="margin:5px 0;float:right;" src="' . $icon_url . '">', 'success', $show_arr);
        show_pick_info(array(milu_lang('the_uid_set'), '<a target="_blank" href="home.php?mod=space&uid=' . $uid . '&do=profile">' . $uid . '</a>' . milu_lang('the_user_set_avatar')), 'left', array('li_no_end' => 1, 'no_border' => 1, 'now' => '-' . $show_arr['now'] . $now_time));
        $size_arr = array('middle', 'big', 'small');
        //顺序一定不可以变
        $create_re = create_avatar_dir($uid, $size);
        //建立头像目录
        if (!$create_re) {
            show_pick_info(milu_lang('avatar_dir_no_wirte'), 'err', $show_arr);
            return FALSE;
        }
        foreach ($size_arr as $size) {
            if ($size != 'middle') {
                $icon_url = $avatar_web_url . get_avatar($avatar_get_uid, $size);
                $img_re = get_img_content($icon_url, $snoopy_obj);
            }
            $avatar_dir_save = './uc_server/' . get_avatar($uid, $size);
            if ($cover_avatar == 1 && file_exists($avatar_dir_save)) {
                //覆盖旧头像
                @unlink($avatar_dir_save);
            }
            $put_re = file_put_contents($avatar_dir_save, $img_re);
            //写入头像
            if (!$put_re) {
                show_pick_info(milu_lang('avatar_dir_no_wirte'), 'err', $show_arr);
                return FALSE;
            }
        }
        $success_count++;
        $_SESSION['avatar_get']['success_count'] = $success_count;
        $show_arr = get_show_arr($now_get, $success_count, $get_count, $all_get_time);
        $show_arr['now_get']++;
        $show_arr['avatar_get_uid']++;
        $show_arr['get_count'] = $get_count;
        $show_info_arr = $show_arr;
        $show_info_arr['now'] = '-' . $show_arr['now'] . $now_time;
        show_pick_info(milu_lang('success'), 'success', $show_info_arr);
        avatar_page_jump($now_get, $avata_jump_num, $get_count);
        $arr = array('content' => $img_re, 'avatar_get_uid' => $avatar_get_uid + 1, 'now_get' => $now_get + 1, 'success_count' => $success_count, 'get_count' => $get_count);
        return $show_arr ? array_merge($show_arr, $arr) : $arr;
    }
}
예제 #3
0
function pick_match_rules()
{
    $url = format_url($_GET['url']);
    d_s();
    $content = get_contents($url);
    $v = match_rules($url, $content, 2, 0);
    if (!$v || !is_array($v)) {
        $v = pick_match_coloud_rules($url);
        if ($v['data_type'] == 1) {
            pload('F:rules');
            $v = $v['data'];
            rules_add($v);
            del_search_index(2);
        }
    }
    if (!$v || !is_array($v)) {
        return 'no';
    }
    $re_arr = array($v['rules_type'], $v['rules_hash']);
    return json_encode($re_arr);
}
예제 #4
0
function fast_pick()
{
    global $_G;
    d_s('f_g');
    d_s('g_t');
    pload('F:spider');
    $url = $_GET['url'];
    $content = get_contents($url, array('cache' => -1));
    $get_time = d_e(0, 'g_t');
    $type = $_GET['type'] ? $_GET['type'] : 'bbs';
    $milu_set = pick_common_get();
    $data = (array) get_single_article($content, $url);
    if ($milu_set['fp_word_replace_open'] == 1 && !VIP) {
        //开启同义词替换
        $words = get_replace_words();
        if ($data['title']) {
            $data['title'] = strtr($data['title'], $words);
        }
        if ($data['content']) {
            $data['content'] = strtr($data['content'], $words);
        }
    }
    if ($milu_set['fp_article_from'] == 1) {
        //开启来源
        $data['fromurl'] = $url;
        if ($type == 'bbs' && $data['content']) {
            $data['content'] .= "[p=30, 2, left]" . milu_lang('article_from') . ':' . $url . "[/p]";
        }
    }
    $data['get_text_time'] = $get_time;
    $data['all_get_time'] = d_e(0, 'f_g');
    $data = $data ? $data : array();
    $data = js_base64_encode($data);
    $re = json_encode($data);
    return $re;
}
예제 #5
0
 function robot($level)
 {
     global $_G;
     $pick_config = $_G['cache']['evn_milu_pick']['pick_config'];
     $del_flag = 0;
     $this->now_level = $level;
     if (!$this->now_url_arr) {
         $this->restart_robot($this->now_level);
     }
     if (!$this->pick_cache_data['url_arr'][$this->now_level]) {
         $this->pick_cache_data['url_arr'][$this->now_level] = $this->now_url_arr;
     }
     foreach ((array) $this->now_url_arr as $k => $url) {
         d_s('run');
         if ($this->p_arr['pick_num'] && $this->i == $this->p_arr['pick_num'] + 2 || $this->p_arr['pick_num'] && $this->i > $this->p_arr['pick_num'] + 2) {
             return;
         }
         $this->pick_cache_data['now_level'] = $this->now_level;
         $this->now_url = $url;
         if ($this->p_arr['url_range_type'] == 3 || $this->now_level == $this->p_arr['manyou_max_level']) {
             $host_arr = $this->GetHostInfo($url);
             $this->base_url = $host_arr['host'];
         }
         $this->format_url();
         $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $this->i));
         show_pick_info(array(milu_lang('read_link'), $this->now_url), 'url', $show_args);
         $this->i++;
         $this->temp_arr['have_reply'] = 0;
         $this->pick_cache_data['i'] = $this->i;
         $visit_flag = $this->check_visit_url();
         if ($visit_flag > 0) {
             if ($this->now_level == 1) {
                 if ($this->p_arr['rules_type'] == 3) {
                     //若是一键采集,判断此网址是否是文章页
                     if (!$this->check_fastpick_viewurl($this->now_url)) {
                         continue;
                     } else {
                         //exit($this->now_url);
                     }
                 }
                 $content = $this->parse_page();
                 $this->status_arr['now'] = $this->i;
                 show_pick_info('', 'success', $this->status_arr);
                 if ($this->p_arr['stop_time'][0]) {
                     sleep($this->p_arr['stop_time'][0]);
                 }
                 $get = 0;
                 $this->temp_arr['have_page'] = 0;
                 if ($this->p_arr['content_page_rules']) {
                     //分页文章
                     if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) {
                         //回复
                     } else {
                         $content_page_arr = $this->get_content_page($content);
                         if ($content_page_arr) {
                             $get = 1;
                             $this->a++;
                             $this->pick_cache_data['a'] = $this->a;
                             $this->temp_arr['have_page'] = 1;
                             $article_info_arr = $this->page_get_content($content, array(), array(), $content_page_arr);
                             if ($article_info_arr) {
                                 //取其他内容
                                 $other_arr = $this->get_article_other($content);
                                 $other_arr = $other_arr ? $other_arr : array();
                                 $article_info_arr = array_merge($article_info_arr, $other_arr);
                                 $this->create_page_article($article_info_arr);
                                 //分页文章的入库
                             } else {
                                 $this->v_a++;
                                 $this->pick_cache_data['v_a'] = $this->v_a;
                             }
                         }
                     }
                 }
                 if ($get == 0) {
                     //普通文章
                     $ori_title = $this->get_ori_title($content);
                     $now = '-' . ($this->i - 1) . time();
                     $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                     show_pick_info(array(milu_lang('read_content'), cutstr($ori_title, 85)), 'left', $show_args);
                     $article_info = $this->get_article($content);
                     $this->status_arr['now'] = $now;
                     show_pick_info('', 'success', $this->status_arr);
                     $article_info = $this->format_article($article_info);
                     $this->get_pick_status();
                     $this->status_arr['now'] = '-' . ($this->i - 1) . time();
                     $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                     $this->temp_arr['normal_now'] = $now;
                     show_pick_info(array(milu_lang('article'), cutstr(trim($article_info['title']), 85)), 'left', $show_args);
                     if ($this->check_article($article_info)) {
                         $this->create_article($article_info);
                     } else {
                         $this->v_a++;
                         $this->pick_cache_data['v_a'] = $this->v_a;
                     }
                 }
                 $this->insert_url();
                 if ($this->aid || $this->public_info['insert_aid']) {
                     if ($this->p_arr['reply_rules'] || $this->p_arr['reply_is_extend']) {
                         //文章有回复
                         if ($this->p_arr['is_public_del'] == 1 && $this->p_arr['is_auto_public'] == 1 && $this->p_arr['public_type'] != 2) {
                             //如果直接发布,而且是发布不入库,而且不是发布到论坛,就不必采集回复
                         } else {
                             $now = '-' . ($this->i - 1) . time();
                             $show_args = array_merge($this->msg_args, array('li_no_end' => 1, 'no_border' => 1, 'now' => $now));
                             $this->temp_arr['reply_now'] = $now;
                             show_pick_info(array(milu_lang('pick_reply')), 'left', $show_args);
                             if (strexists($this->p_arr['reply_max_num'], ',')) {
                                 $arr = explode(',', $this->p_arr['reply_max_num']);
                                 $this->reply_max_num = rand($arr[0], $arr[1]);
                             } else {
                                 $this->reply_max_num = intval($this->p_arr['reply_max_num']);
                             }
                             $this->oldurl_arr = NULL;
                             $reply_arr = $this->page_get_reply($content, array($this->now_url));
                             $reply_arr = sarray_unique($reply_arr);
                             //去重复处理
                             $this->create_reply($reply_arr);
                             $this->oldurl_arr = NULL;
                             $this->temp_arr['have_reply'] = 1;
                         }
                     }
                 }
             }
             $msg = '';
             $link_count = 0;
             $next_link = array();
             if ($this->now_level > 1) {
                 if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['url_range_type'] == 5 || $this->p_arr['rules_type'] == 1) {
                     //分页列表或多层列表获取是内置规则
                     if ($this->p_arr['url_range_type'] == 5) {
                         $key_level = abs($this->now_level - 1 - count($this->p_arr['many_page_list'])) + 1;
                         $rules_arr = $this->p_arr['many_page_list'][$key_level];
                     } else {
                         if ($this->p_arr['url_range_type'] == 1 || $this->p_arr['rules_type'] == 1) {
                             $rules_arr['type'] = $this->p_arr['page_get_type'];
                             $rules_arr['rules'] = $this->p_arr['page_link_rules'];
                         }
                     }
                     $content = $this->parse_page();
                     if ($rules_arr['type'] == 1) {
                         $next_link = dom_page_link($content, array('page_link_rules' => $rules_arr['rules'], 'url' => $this->now_url));
                     } else {
                         if ($rules_arr['type'] == 2) {
                             $next_link = string_page_link($content, trim($rules_arr['rules']), $this->now_url);
                         } else {
                             $next_link = evo_get_pagelink($content, $this->now_url);
                         }
                     }
                     if ($this->p_arr['url_range_type'] == 1 && !$rules_arr['rules']) {
                         $msg = ' : ' . milu_lang('no_set_list_rules');
                     }
                     $link_count = $this->temp_arr['per_num'] = count($next_link);
                     if ($link_count == 0 && $rules_arr['rules']) {
                         $msg = ' : ' . milu_lang('check_list_rules');
                     }
                     $this->get_pick_count();
                 } else {
                     if ($this->p_arr['rules_type'] == 3) {
                         //一键采集
                         $content = $this->parse_page();
                         $next_link = evo_get_pagelink($content, $this->now_url, $this->pick_cache_data['lilely_page']);
                         $link_count = count($next_link);
                     }
                 }
                 $this->get_pick_status(1);
                 show_pick_info(milu_lang('get_link_c', array('c' => $link_count)) . $msg, $link_count > 0 ? 'success' : 'err', $this->status_arr);
                 if ($next_link) {
                     $this->pick_cache_data['url_arr'][$this->now_level - 1] = $this->now_url_arr = $next_link;
                 }
             } else {
                 $next_link = $this->now_url_arr = $this->pick_cache_data['url_arr'][$this->now_level];
             }
             if (!$this->flip()) {
                 return;
             }
             $this->del_session_arr($this->now_level);
             if (!$this->pick_cache_data['url_arr']) {
                 return;
             }
             $del_flag = 1;
             if ($this->now_level > 1 && $next_link) {
                 $this->now_level -= 1;
                 $this->robot($level - 1);
             }
         } else {
             $this->v_i++;
             $this->pick_cache_data['v_i'] = $this->v_i;
             $this->get_pick_status(1);
             show_pick_info(milu_lang('no_visit_err' . $visit_flag), 'err', $this->status_arr);
             if (!$this->flip()) {
                 return;
             }
         }
         if ($del_flag != 1) {
             $this->del_session_arr($this->now_level);
         }
     }
     $this->now_level += 1;
     $this->restart_robot($this->now_level);
 }