function getItemdetails($url) { $dataResult = array(); $baseUrl = "http://www.dianping.com"; $totalUrl = $baseUrl . $url; cls_curl::set_gzip(true); $content = cls_curl::get($totalUrl); preg_match_all('#<h1 class=\\"shop-name\\">\\s(.*?)\\s</h1>#s', $content, $shopname); $name = preg_replace("/<(a.*?)>(.*?)<(\\/a.*?)>/si", "", $shopname[0][0]); $name = preg_replace("/<(\\/?h1.*?)>/si", "", $name); $dataResult['name'] = $name; preg_match_all('#<span class="info-name">营业时间:(.*?)</span>(.*?)<span class="item">\\s(.*?)\\s</span>#s', $content, $hour); $dataResult['hour'] = $hour[3][0]; preg_match_all('#<div class="expand-info address" itemprop="street-address">\\s(.*?)\\s</div>#s', $content, $address); $addr = preg_replace("/<(\\/?div.*?)>/si", "", $address[0][0]); $addr = preg_replace("/<(\\/?span.*?)>/si", "", $addr); $addr = preg_replace("/<(\\/?a.*?)>/si", "", $addr); $dataResult['address_detail'] = $addr; preg_match_all('#<p class="expand-info tel">\\s(.*?)\\s</p>#s', $content, $phone); $phoneNum = preg_replace("/<(\\/?p.*?)>/si", "", $phone[0][0]); $phoneNum = preg_replace("/<(\\/?span.*?)>/si", "", $phoneNum); $phoneNum = preg_replace("/<(a.*?)>(.*?)<(\\/a.*?)>/si", "", $phoneNum); $dataResult['tel'] = $phoneNum; $dataResult['province'] = '北京市'; $dataResult['city'] = '北京'; preg_match_all('#<span class="item">人均:(.*?)</span>#s', $content, $price); $dataResult['price'] = $price[1][0]; return $dataResult; }
if ($xml_get_orders->RecordCount() > 0) { while (!$xml_get_orders->EOF) { // echo $xml_get_orders->fields['orders_id'];echo '<hr>'; if (!in_array($xml_get_orders->fields['orders_id'], $xml_array_ordersid)) { // echo 'init it '; $oID = $xml_get_orders->fields['orders_id']; $order = new order($oID); $xml_sou = ajax_sendxml($order, $oID); error_log($xml_get_orders->fields['orders_id'] . '__', 3, $log_file_name); //send id history 非正常处理,log需要另外生成 $xml_sendto = urlencode($xml_sou); // $url = "http://localhost:8012/?xmlfileinfo=" . $xml_sendto;//输出地址 echo $url = "http://flyinglimy.gicp.net/crmsfa/control/websiteOrderReceiver?order=" . $xml_sendto; // header("location:{$url}"); // $c=file_get_contents($url);//改用curl $c = cls_curl::get($url); var_dump($c); echo '--------ok----------'; exit('send finished'); } else { // echo 'has been send'; } $xml_get_orders->MoveNext(); } } die('die to send xml'); } class cls_curl { protected static $timeout = 20; protected static $ch = null;
/** * 获取用户 * * @param string $username * @param string $user_type followees 、followers * @return void * @author seatle <*****@*****.**> * @created time :2015-07-28 09:46 */ function get_user_index($username, $user_type = 'followees', $worker) { $url = "http://www.zhihu.com/people/{$username}/{$user_type}"; set_cookie(); cls_curl::set_gzip(true); $content = cls_curl::get($url); if (empty($content)) { return array(); } $users = array(); // 用户不足20个的时候,从ajax取不到用户,所以首页这里还是要取一下 preg_match_all('#<h2 class="zm-list-content-title"><a data-tip=".*?" href="http://www.zhihu.com/people/(.*?)" class="zg-link" title=".*?">(.*?)</a></h2>#', $content, $out); $count = count($out[1]); for ($i = 0; $i < $count; $i++) { $d_username = empty($out[1][$i]) ? '' : $out[1][$i]; $d_nickname = empty($out[2][$i]) ? '' : $out[2][$i]; if (!empty($d_username) && !empty($d_nickname)) { $users[$d_username] = array('username' => $d_username, 'nickname' => $d_nickname); } } $keyword = $user_type == 'followees' ? '关注了' : '关注者'; $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 主页 --- 成功\n"); preg_match('#<span class="zg-gray-normal">' . $keyword . '</span><br />\\s<strong>(.*?)</strong><label> 人</label>#', $content, $out); $user_count = empty($out[1]) ? 0 : intval($out[1]); preg_match('#<input type="hidden" name="_xsrf" value="(.*?)"/>#', $content, $out); $_xsrf = empty($out[1]) ? '' : trim($out[1]); preg_match('#<div class="zh-general-list clearfix" data-init="(.*?)">#', $content, $out); $url_params = empty($out[1]) ? '' : json_decode(html_entity_decode($out[1]), true); if (!empty($_xsrf) && !empty($url_params) && is_array($url_params)) { $url = "http://www.zhihu.com/node/" . $url_params['nodename']; $params = $url_params['params']; $j = 1; for ($i = 0; $i < $user_count; $i = $i + 20) { $params['offset'] = $i; $post_data = array('method' => 'next', 'params' => json_encode($params), '_xsrf' => $_xsrf); $content = cls_curl::post($url, $post_data); if (empty($content)) { $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 失败\n"); continue; } $rows = json_decode($content, true); if (empty($rows['msg']) || !is_array($rows['msg'])) { $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 失败\n"); continue; } $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 成功\n"); foreach ($rows['msg'] as $row) { preg_match_all('#<h2 class="zm-list-content-title"><a data-tip=".*?" href="http://www.zhihu.com/people/(.*?)" class="zg-link" title=".*?">(.*?)</a></h2>#', $row, $out); $d_username = empty($out[1][0]) ? '' : $out[1][0]; $d_nickname = empty($out[2][0]) ? '' : $out[2][0]; if (!empty($d_username) && !empty($d_nickname)) { $users[$d_username] = array('username' => $d_username, 'nickname' => $d_nickname); } } $j++; } } return $users; }
<?php date_default_timezone_set('Asia/Shanghai'); ini_set('display_errors', 1); include "config.php"; include "cls_curl.php"; include "db.php"; include "cache.php"; include "worker.php"; include "../rolling/RollingCurl.php"; // an array of URL's to fetch $urls = array("http://www.dianping.com/search/category/2/45/g146p2"); $content = cls_curl::get($urls[0]); print_r($content); // a function that will process the returned responses function request_callback($response, $info, $request) { // parse the page title out of the returned HTML if (preg_match("~<title>(.*?)</title>~i", $response, $out)) { $title = $out[1]; } echo "<b>{$title}</b><br />"; print_r($info); echo "<br>"; echo "<hr>"; } // create a new RollingCurl object and pass it the name of your custom callback function $rc = new RollingCurl("request_callback"); // the window size determines how many simultaneous requests to allow. $rc->window_size = 20; foreach ($urls as $url) {