public static function multiGetFromCiteServer($mode, $sets, $style = 'chicago-note-bibliography') { require_once "../include/RollingCurl.inc.php"; $t = microtime(true); $setIDs = array(); $data = array(); $requestCallback = function ($response, $info) use($mode, &$setIDs, &$data) { if ($info['http_code'] != 200) { error_log("WARNING: HTTP {$info['http_code']} from citeserver {$mode} request: " . $response); return; } $response = json_decode($response); if (!$response) { error_log("WARNING: Invalid response from citeserver {$mode} request: " . $response); return; } $str = parse_url($info['url']); $str = parse_str($str['query']); if ($mode == 'citation') { $data[$setIDs[$setID]] = Zotero_Cite::processCitationResponse($response); } else { if ($mode == "bib") { $data[$setIDs[$setID]] = Zotero_Cite::processBibliographyResponse($response); } } }; $rc = new RollingCurl($requestCallback); // Number of simultaneous requests $rc->window_size = 20; foreach ($sets as $key => $items) { $json = self::getJSONFromItems($items); $server = Z_CONFIG::$CITATION_SERVERS[array_rand(Z_CONFIG::$CITATION_SERVERS)]; $url = "http://{$server}/?responseformat=json&style={$style}"; if ($mode == 'citation') { $url .= "&citations=1&bibliography=0"; } // Include array position in URL so that the callback can figure // out what request this was $url .= "&setID=" . $key; // TODO: support multiple items per set, if necessary if (!$items instanceof Zotero_Item) { throw new Exception("items is not a Zotero_Item"); } $setIDs[$key] = $items->libraryID . "/" . $items->key; $request = new RollingCurlRequest($url); $request->options = array(CURLOPT_POST => 1, CURLOPT_POSTFIELDS => $json, CURLOPT_HTTPHEADER => array("Expect:"), CURLOPT_CONNECTTIMEOUT => 1, CURLOPT_TIMEOUT => 4, CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1); $rc->add($request); } $rc->execute(); error_log(sizeOf($sets) . " {$mode} requests in " . round(microtime(true) - $t, 3)); return $data; }
/** * 抓取网页 * * @param int $window_size curl批处理句柄最大同时连接数 */ public function fetch_html($window_size = NULL) { $rc = new RollingCurl($this->callback); foreach ($this->urls as $url) { $request = new RollingCurlRequest($url, 'Get', NULL, NULL, $this->options); $rc->add($request); } if (!empty($window_size)) { $rc->execute($window_size); } else { $rc->execute($this->window_size); } }
public function add($request) { if ($request instanceof RollingCurlGroup) { return $request->addToRC($this); } else { return parent::add($request); } }
*/ // a little example that fetches a bunch of sites in parallel and echos the page title and response info for each request function request_callback($response, $info) { // parse the page title out of the returned HTML if (preg_match("~<title>(.*?)</title>~i", $response, $out)) { $title = $out[1]; } echo "<b>{$title}</b><br />"; print_r($info); echo "<hr>"; } require "RollingCurl.php"; // single curl request $rc = new RollingCurl("request_callback"); $rc->request("http://www.msn.com"); $rc->execute(); // another single curl request $rc = new RollingCurl("request_callback"); $rc->request("http://www.google.com"); $rc->execute(); echo "<hr>"; // top 20 sites according to alexa (11/5/09) $urls = array("http://www.google.com", "http://www.facebook.com", "http://www.yahoo.com", "http://www.youtube.com", "http://www.live.com", "http://www.wikipedia.com", "http://www.blogger.com", "http://www.msn.com", "http://www.baidu.com", "http://www.yahoo.co.jp", "http://www.myspace.com", "http://www.qq.com", "http://www.google.co.in", "http://www.twitter.com", "http://www.google.de", "http://www.microsoft.com", "http://www.google.cn", "http://www.sina.com.cn", "http://www.wordpress.com", "http://www.google.co.uk"); $rc = new RollingCurl("request_callback"); $rc->window_size = 20; foreach ($urls as $url) { $request = new Request($url); $rc->add($request); } $rc->execute();
public function fetchAllOnce(array $urls, $isRedirect = false) { if (!$isRedirect) { $urls = array_unique($urls); } if (empty($urls)) { return; } ////////////////////////////////////////////////////// // parallel (HttpRequestPool) if ($this->method == self::METHOD_REQUEST_POOL) { $this->debug('Starting parallel fetch (HttpRequestPool)'); try { while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new HttpRequestPool(); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $httpRequest = new HttpRequest($req_url, HttpRequest::METH_GET, $this->requestOptions); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpRequest->addHeaders(array('Cookie' => $cookies)); } $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; $pool->attach($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); try { $pool->send(); } catch (HttpRequestPoolException $e) { // do nothing } $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } //if (!isset($this->requests[$url]['fromCache'])) { $request = $this->requests[$orig]['httpRequest']; //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); // getResponseHeader() doesn't return status line, so, for consistency... $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); $this->requests[$orig]['body'] = $request->getResponseBody(); $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); // is redirect? if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { $redirectURL = $request->getResponseHeader('location'); if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $request->getResponseHeader('set-cookie'); if ($cookies && !is_array($cookies)) { $cookies = array($cookies); } if ($cookies) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } //die($url.' -multi- '.$request->getResponseInfo('effective_url')); $pool->detach($request); unset($this->requests[$orig]['httpRequest'], $request); /* if ($this->minimiseMemoryUse) { if ($this->cache($url)) { unset($this->requests[$url]); } } */ //} } } } } catch (HttpException $e) { $this->debug($e); return false; } } elseif ($this->method == self::METHOD_CURL_MULTI) { $this->debug('Starting parallel fetch (curl_multi_*)'); while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new RollingCurl(array($this, 'handleCurlResponse')); $pool->window_size = count($subset); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $headers = array(); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $headers[] = 'Cookie: ' . $cookies; } $httpRequest = new RollingCurlRequest($req_url, 'GET', null, $headers, array(CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], CURLOPT_TIMEOUT => $this->requestOptions['timeout'])); $httpRequest->set_original_url($orig); $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? $pool->add($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } // $this->requests[$orig]['headers'] // $this->requests[$orig]['body'] // $this->requests[$orig]['effective_url'] $status_code = $this->requests[$orig]['status_code']; if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } // die($url.' -multi- '.$request->getResponseInfo('effective_url')); unset($this->requests[$orig]['httpRequest']); } } } } else { $this->debug('Starting sequential fetch (file_get_contents)'); $this->debug('Processing set of ' . count($urls)); foreach ($urls as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("Sending request for {$url}"); $this->requests[$orig]['original_url'] = $orig; $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); // send cookies, if we have any $httpContext = $this->httpContext; if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpContext['http']['header'] .= 'Cookie: ' . $cookies . "\r\n"; } if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { $this->debug('Received response'); // get status code if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\\d+\\.\\d+\\s+(\\d+)!', trim($http_response_header[0]), $match)) { $this->debug('Error: no status code found'); // TODO: handle error - no status code } else { $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); $this->requests[$orig]['body'] = $html; $this->requests[$orig]['effective_url'] = $req_url; $this->requests[$orig]['status_code'] = $status_code = (int) $match[1]; unset($match); // handle redirect if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { $this->requests[$orig]['location'] = trim($match[1]); } if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } } } else { $this->debug('Error retrieving URL'); //print_r($req_url); //print_r($http_response_header); //print_r($html); // TODO: handle error - failed to retrieve URL } } } } }
} else { $result = "fail\n"; if ($response[0] == '<') { $result .= "\t\tReceived: " . $response . "...\n"; } else { $result .= "\t\tReceived: " . base64_encode(substr($response, 0, 50)) . "...\n"; } $result .= "\t\tSent: " . base64_encode(substr($png, 0, 50)) . "...\n"; $results[$request->index] = $result; } }); for ($x = 1; $x <= $files_appearing; $x++) { $results[$x] = 'unknown'; $request = new RollingCurlRequest('http://' . $username . ':' . $password . '@' . $owncloud_remote . '/webdav' . $testdir . '/sample_' . $x . '.png', 'GET'); $request->index = $x; $verify->add($request); } $verify->execute($window); echo "\nVerification:\n"; foreach ($results as $idx => $result) { echo "\t{$idx}: {$result}\n"; if ($result != 'pass') { $hasproblem = true; } } if ($hasproblem) { echo "\nREVIEW THE RESULTS ABOVE FOR A POTENTIAL PROBLEM.\n"; } else { echo "\nNO OBVIOUS PROBLEMS DETECTED.\n"; } // Clean up temp images
public function sendRequestCallBack($response, $info = '') { // 处理内容 $response = preg_replace("/<code[\\s\\S]*><!--/iU", "", $response); $response = preg_replace("/--><\\/code>/iU", "", $response); // 分析html,获取添加好友需要的数据 \phpQuery::newDocumentHTML($response); $pagelet_timeline_main_column = pq("#pagelet_timeline_main_column")->attr("data-gt"); $pagelet_timeline_main_column = json_decode($pagelet_timeline_main_column); $profile_owner = $pagelet_timeline_main_column->profile_owner; $requests = array(); foreach (pq("div.fsl.fwb.fcb > a") as $value) { $data_hovercard = pq($value)->attr("data-hovercard"); $data_gt = pq($value)->attr("data-gt"); $data_gt = json_decode($data_gt); preg_match("/\\?id=([0-9]*)&/iU", $data_hovercard, $matches); $to_friend = $matches[1]; // 发送好友请求 $query = array("to_friend" => $to_friend, "action" => "add_friend", "how_found" => "profile_friends", "ref_param" => "pb_friends_tl", "link_data[gt][coeff2_registry_key]" => $data_gt->coeff2_registry_key, "link_data[gt][coeff2_info]" => $data_gt->coeff2_info, "link_data[gt][coeff2_action]" => $data_gt->coeff2_action, "link_data[gt][coeff2_pv_signature]" => $data_gt->coeff2_pv_signature, "link_data[gt][profile_owner]" => $profile_owner, "link_data[gt][ref]" => "timeline:timeline", "outgoing_id" => '', "logging_location" => '', "no_flyout_on_click" => "true", "ego_log_data" => '', "http_referer" => '', "floc" => "friends_tab", "__user" => $this->user_id, "__a" => "1", "fb_dtsg" => $this->token, "__rev" => $this->version); $capt_opts = $this->curl_opts; $url = "https://www.facebook.com/ajax/add_friend/action.php?__pc=EXP1%3ADEFAULT"; $capt_opts[CURLOPT_POST] = true; $capt_opts[CURLOPT_POSTFIELDS] = $query; $request = new \RollingCurlRequest($url); $request->options = $capt_opts; $requests[] = $request; } if (empty($requests)) { return; } $rc = new \RollingCurl(); if (sizeof($requests) < 20) { $rc->window_size = sizeof($requests); } else { $rc->window_size = 20; } foreach ($requests as $value) { $rc->add($value); } $rc->execute(); }
// 'dol.gov', // 'humanresources.about.com', // 'shrm.org', // 'diversityinc.com', // 'stevepavlina.com/blog/', // 'osha.gov', // 'hr.com', // 'ere.net', // 'cisin.com', // 'blr.com', // 'peopleadmin.com', // 'wageworks.com', // 'dalecarnegie.com', // 'doleta.gov', // 'mercer.com', // 'astd.org', // 'brightscope.com', // 'tmp.com', // 'trinet.com', //]; // DETECT CYCLE! $loader = new FileLoader(); $scanner = new Scanner($loader, __DIR__ . '/apps.json'); $category = new Category($loader, __DIR__ . '/apps.json'); require_once __DIR__ . '/lib/RollingCurl.php'; $rc = new RollingCurl(); foreach ($urls as $url) { $rc->add(new RollingCurlRequest("http://scanner.loc/worker.php?url=" . urlencode($url))); } $rc->execute(10); echo "All request sent";