function find_content($xpath, $query) { $results = $xpath->evaluate($query); $result = $results->item(0); return dom_to_html($result); }
function process_bbs_appgame_url($req_url) { preg_match('#^http://bbs\\.appgame\\.com/thread-[\\d]+-[\\d]+-[\\d]+\\.html$#us', $req_url, $matches); if ($matches == null) { return false; } $user_agent = 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36'; $html = curl_get_content($req_url, $user_agent); if (empty($html)) { return false; } $regex_match = "#<div id=\"post_(\\d+)\">#s"; if (!preg_match($regex_match, $html, $match)) { return false; } $pid = $match[1]; preg_match('#发表于[^<]*?<span title="([^"]*?)">.*?</span>#s', $html, $match); $time_str = $match[1]; preg_match("#<title>([^<]*?)</title>#s", $html, $match); $title = $match[1]; $title = preg_replace("#_[^_]+_任玩堂.*\$#u", '', $title); $html = mb_convert_encoding($html, 'HTML-ENTITIES', mb_detect_encoding($html)); $saw = new nokogiri($html); $target = $saw->get('td#postmessage_' . $pid); $dom = $target->getDom(); $node = $dom->firstChild->childNodes->item(0); $content = strip_tags(dom_to_html($node)); $content = preg_replace("#[\\s]+#us", '', $content); preg_match('#showauthor\\(this, \'userinfo' . $pid . '\'.*?<img .*?src="([^"]+?)"#s', $html, $match); $user_pic = $match[1]; $user_img = get_redirect_url($user_pic); $res = array(); $res['onebox'] = 'appgame-bbs'; $res['provider_name'] = '任玩堂论坛'; $res['provider_url'] = 'http://bbs.appgame.com/'; $res['favicon_url'] = 'http://www.appgame.com/favicon.ico'; $res['ori_url'] = $req_url; $res['title'] = $title; $res['image'] = $user_img; $res['ID'] = intval($pid); $res['description'] = trim($content); $res['update_time'] = format_time($time_str); $res['create_time'] = $res['update_time']; return $res; }