Ejemplo n.º 1
0
 } else {
     $message = '<table class="table table-bordered">';
     $express_type = '顺丰快递';
     $express_url = $url_frame_sf;
     foreach ($html_array as $element) {
         $message .= '<tr>';
         if ($element->children(1)) {
             //this row has two '<td></td>'s
             $message .= '<td>' . $element->children(0)->innertext . '</td>';
             $message .= '<td>' . $element->children(1)->innertext . '</td>';
         } else {
             //this row has only one '<td></td>'
             if ($element->children(0)->children(0)) {
                 //this is a iFrame
                 $url_china_src = $element->children(0)->children(0)->children(0)->src;
                 $url_frame_china = $express_url . find_num($url_china_src);
                 $message .= '<td colspan="2" >';
                 $message .= '<iframe frameborder="0" scrolling="no" style="overflow:hidden;" src="' . $url_frame_china . '" width="100%" height="260px"></iframe>';
                 $message .= '</td>';
             } else {
                 // this row contains express information in China
                 $china_express_info = $element->children(0)->innertext;
                 $express_type = find_express_name($china_express_info);
                 if ($express_type == '顺丰快递') {
                     $express_url = $url_frame_sf;
                 } else {
                     if ($express_type == '天天快递') {
                         $express_url = $url_frame;
                     }
                 }
                 $message .= '<td colspan="2">' . $china_express_info . '</td>';
Ejemplo n.º 2
0
function fetch_web($schemas, $test = null)
{
    global $_user, $_channels;
    $updated_file = array();
    foreach ($schemas as $k => $v) {
        if (!$v->channel && $v->url == '' || $v->keywords == '') {
            continue;
        }
        if ($v->channel) {
            $url_arr = $_channels[$v->channel]['url'];
        } else {
            $url_arr = explode(' ', $v->url);
        }
        foreach ($url_arr as $url_key => $url) {
            $url = mkurl($v->channel, $url_key, $url, $v->url);
            $filename = md5($url) . '.tmp';
            if (in_array($filename, $updated_file)) {
                $html = file_get_contents(CACHE_PATH . $filename);
            } else {
                $file = '';
                $context = $v->channel ? mkfp_context($url_key) : null;
                $html = file_get_contents($url, false, $context);
                $html = format_script($html);
                file_put_contents(CACHE_PATH . $filename, $html);
                $updated_file[] = $filename;
            }
            //处理关键词
            $keywords = $v->keywords;
            $encoding = mb_detect_encoding($html, array('ASCII', 'GB2312', 'GBK', 'UTF-8'));
            if ($encoding == 'EUC-CN') {
                $encoding = 'GB2312';
                $keywords = iconv('UTF-8', 'GBK', $keywords);
            } elseif ($encoding == 'CP936') {
                $keywords = iconv('UTF-8', $encoding, $keywords);
            }
            $keywords = explode(' ', $keywords);
            $first_key = array_shift($keywords);
            $lpos = 0;
            $rpos = NULL;
            $len = strlen($first_key);
            for ($lpos = strpos($html, $first_key, $lpos); $lpos !== false; $lpos = strpos($html, $first_key, $lpos + $len)) {
                $rpos = find_key($html, $keywords, $lpos + $len);
                if ($rpos) {
                    $html_len = strlen($html);
                    //得到关键词上下文
                    for ($j = 1; $j < 6 && $lpos !== false; $j++) {
                        $lpos = strrpos($html, '</', $lpos - $html_len - 2);
                    }
                    if (!$lpos) {
                        $lpos = 0;
                    }
                    for ($i = 1; $i < 5; $i++) {
                        $rpos = strpos($html, '</', $rpos + 2);
                    }
                    if (!$rpos) {
                        $rpos = $html_len;
                    }
                    $result = substr($html, $lpos, $rpos - $lpos);
                    break;
                }
            }
            $result = trim(strip_tags($result, '<a>'));
            //确定包含数字
            if ($v->max_num != '' || $v->min_num != '') {
                $result = find_num($result, $v->max_num, $v->min_num);
            }
            if ($result) {
                $result = str_replace(array("\n", "\r", "\t", "  "), '', $result);
                if ($encoding != 'UTF-8') {
                    $result = substr($result, 0, 400);
                    $result = iconv('GBK', 'UTF-8', $result);
                } else {
                    $result = mb_substr($result, 0, 400, 'utf8');
                }
                $website = isset($_channels[$v->channel]['website'][$url_key]) ? $_channels[$v->channel]['website'][$url_key] : $url;
                $content = $result . "<a href='{$website}' target='_blank'>查看</a>";
                if ($test) {
                    return $content;
                } else {
                    $_user = user::get_one(array('user_id' => $v->user_id));
                    schema::update(array('status' => 'off', 'schema_id' => $v->schema_id));
                    msg::add(array('schema_id' => $v->schema_id, 'title' => $v->title, 'content' => $content, 'status' => 'new'));
                    $email = filter_var($_user->email, FILTER_VALIDATE_EMAIL);
                    if ($_user->email_notify == 'on' && $email) {
                        mail::send_cron($email, $v->title, $content);
                    }
                    if ($_user->app_notify == 'on' && $_user->baidu_uid) {
                        push::push_message($_user->baidu_uid, $v->title, $result);
                    }
                }
                $result = NULL;
                break;
            }
        }
    }
}