function getHtml($url) { $curlObj = new Curl(); $curlObj->setUrl($url); $html = $curlObj->run(); //标题 $p = '/<h1>(.*?)<\\/h1>/'; preg_match($p, $html, $match); $title = isset($match[1]) ? $match[1] : ''; //echo $title; //日期 $p = '/<div class="time">(.*?)<\\/div>/'; preg_match($p, $html, $match); $time = isset($match[1]) ? $match[1] : 0; if (!empty($time)) { $time = str_replace(array('年', '月'), '', $time); $time = str_replace('日', '', $time); $time = strtotime($time); } //echo $time; //摘要 $p = '/<div class="b-review">(.*?)<\\/div>/'; preg_match($p, $html, $match); $short = isset($match[1]) ? $match[1] : ''; //echo $short; //内容 $p = '/<!--文章主体-->([\\s|\\S]*?)<!--原文标题-->/'; preg_match($p, $html, $match); $content = isset($match[1]) ? $match[1] : ''; //echo $content; //分页 while (true) { $p = '/<a href="[^<]*?(_(\\d+)\\.html)" target="_self" class="page-btn">下一页<\\/a>/'; preg_match($p, $html, $match); $next = isset($match[1]) ? $match[1] : ''; if ($next) { $url = str_replace('.html', $match[1], $url); $curlObj->setUrl($url); $html = $curlObj->run(); //内容 $p = '/<!--文章主体-->([\\s|\\S]*?)<!--原文标题-->/'; preg_match($p, $html, $match); $content .= isset($match[1]) ? $match[1] : ''; } else { break; } } // echo str_replace(" ", "\n", strip_tags($content));die(); $data = array('title' => $title, 'time' => $time, 'short' => $short, 'content' => str_replace(" ", "\n", strip_tags($content))); echo json_encode($data); }
function post($url, $data) { $curlObj = new Curl(); $curlObj->setUrl($url); $curlObj->setPost($data); return $curlObj->run(); }
function getId($unique) { $url = "http://app.qichacha.com/enterprises/new/getShareURL?unique=" . $unique; $tempIp = rand(1, 255) . '.' . rand(1, 255) . '.' . rand(1, 255) . '.' . rand(1, 255); $header = array("CLIENT-IP:{$tempIp}", "X-FORWARDED-FOR:{$tempIp}"); $curl = new Curl(); $curl->setUrl($url); $resStr = $curl->run(); preg_match('/share\\/(.*?)"/', $resStr, $match); if (isset($match[1])) { return $match[1]; } else { return ''; } }
function getArticleInfo($name) { global $db; $curl = new Curl(); $url = "http://www.jianshu.com/p/{$name}"; $ip = rand(1, 255) . '.' . rand(1, 255) . '.' . rand(1, 255) . '.' . rand(1, 255); $curl->setUrl($url); $head = array("CLIENT-IP:{$ip}", "X-FORWARDED-FOR:{$ip}"); $curl->setHttpHeader($head); $html = $curl->run(); // echo $url,"\r\n"; // echo $html; $p = "/<script type='application\\/json' data-name='note'>\\s*(.*?)\\s*<\\/script>/"; preg_match($p, $html, $match); $arr = isset($match[1]) ? json_decode($match[1], true) : array(); $data['read_num'] = isset($arr['views_count']) ? $arr['views_count'] : 0; $data['comment_num'] = isset($arr['comments_count']) ? $arr['comments_count'] : 0; $data['like_num'] = isset($arr['likes_count']) ? $arr['likes_count'] : 0; $data['image_url'] = isset($arr['image_url']) ? $arr['image_url'] : 0; //文章详情 $p = '/<div class="show-content">([\\s\\S]*?)<\\/div>\\s*<\\/div>\\s*<\\/div>\\s*<div class="visitor_edit"/'; preg_match($p, $html, $match); // print_r($match); $data['content'] = isset($match[1]) ? $match[1] : ''; //评论用户 $data['comment_user'] = array(); $p = "/<script type='application\\/json' data-name='uuid'>\\s*(.*?)\\s*<\\/script>/"; preg_match($p, $html, $match); // print_r($match); $arr = isset($match[1]) ? json_decode($match[1], true) : array(); $uuid = $arr['uuid']; if (!empty($uuid)) { $curl->setUrl("http://www.jianshu.com/notes/cae7cda41db4/mark_viewed.json"); $curl->setPost($arr); $html = $curl->run(); $temp = json_decode($html, true); //print_r($temp); if (is_array($temp['likes']) && !empty($temp['likes'])) { foreach ($temp['likes'] as $value) { $data['comment_user'][] = $value['user']['slug']; } } } return $data; }