コード例 #1
0
ファイル: Cite.inc.php プロジェクト: robinpaulson/dataserver
 public static function multiGetFromCiteServer($mode, $sets, $style = 'chicago-note-bibliography')
 {
     require_once "../include/RollingCurl.inc.php";
     $t = microtime(true);
     $setIDs = array();
     $data = array();
     $requestCallback = function ($response, $info) use($mode, &$setIDs, &$data) {
         if ($info['http_code'] != 200) {
             error_log("WARNING: HTTP {$info['http_code']} from citeserver {$mode} request: " . $response);
             return;
         }
         $response = json_decode($response);
         if (!$response) {
             error_log("WARNING: Invalid response from citeserver {$mode} request: " . $response);
             return;
         }
         $str = parse_url($info['url']);
         $str = parse_str($str['query']);
         if ($mode == 'citation') {
             $data[$setIDs[$setID]] = Zotero_Cite::processCitationResponse($response);
         } else {
             if ($mode == "bib") {
                 $data[$setIDs[$setID]] = Zotero_Cite::processBibliographyResponse($response);
             }
         }
     };
     $rc = new RollingCurl($requestCallback);
     // Number of simultaneous requests
     $rc->window_size = 20;
     foreach ($sets as $key => $items) {
         $json = self::getJSONFromItems($items);
         $server = Z_CONFIG::$CITATION_SERVERS[array_rand(Z_CONFIG::$CITATION_SERVERS)];
         $url = "http://{$server}/?responseformat=json&style={$style}";
         if ($mode == 'citation') {
             $url .= "&citations=1&bibliography=0";
         }
         // Include array position in URL so that the callback can figure
         // out what request this was
         $url .= "&setID=" . $key;
         // TODO: support multiple items per set, if necessary
         if (!$items instanceof Zotero_Item) {
             throw new Exception("items is not a Zotero_Item");
         }
         $setIDs[$key] = $items->libraryID . "/" . $items->key;
         $request = new RollingCurlRequest($url);
         $request->options = array(CURLOPT_POST => 1, CURLOPT_POSTFIELDS => $json, CURLOPT_HTTPHEADER => array("Expect:"), CURLOPT_CONNECTTIMEOUT => 1, CURLOPT_TIMEOUT => 4, CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1);
         $rc->add($request);
     }
     $rc->execute();
     error_log(sizeOf($sets) . " {$mode} requests in " . round(microtime(true) - $t, 3));
     return $data;
 }
コード例 #2
0
ファイル: CG_Fetch_Html.php プロジェクト: codergma/myspider2
 /**
  * 抓取网页
  *
  * @param int $window_size curl批处理句柄最大同时连接数
  */
 public function fetch_html($window_size = NULL)
 {
     $rc = new RollingCurl($this->callback);
     foreach ($this->urls as $url) {
         $request = new RollingCurlRequest($url, 'Get', NULL, NULL, $this->options);
         $rc->add($request);
     }
     if (!empty($window_size)) {
         $rc->execute($window_size);
     } else {
         $rc->execute($this->window_size);
     }
 }
コード例 #3
0
 public function add($request)
 {
     if ($request instanceof RollingCurlGroup) {
         return $request->addToRC($this);
     } else {
         return parent::add($request);
     }
 }
コード例 #4
0
*/
// a little example that fetches a bunch of sites in parallel and echos the page title and response info for each request
function request_callback($response, $info)
{
    // parse the page title out of the returned HTML
    if (preg_match("~<title>(.*?)</title>~i", $response, $out)) {
        $title = $out[1];
    }
    echo "<b>{$title}</b><br />";
    print_r($info);
    echo "<hr>";
}
require "RollingCurl.php";
// single curl request
$rc = new RollingCurl("request_callback");
$rc->request("http://www.msn.com");
$rc->execute();
// another single curl request
$rc = new RollingCurl("request_callback");
$rc->request("http://www.google.com");
$rc->execute();
echo "<hr>";
// top 20 sites according to alexa (11/5/09)
$urls = array("http://www.google.com", "http://www.facebook.com", "http://www.yahoo.com", "http://www.youtube.com", "http://www.live.com", "http://www.wikipedia.com", "http://www.blogger.com", "http://www.msn.com", "http://www.baidu.com", "http://www.yahoo.co.jp", "http://www.myspace.com", "http://www.qq.com", "http://www.google.co.in", "http://www.twitter.com", "http://www.google.de", "http://www.microsoft.com", "http://www.google.cn", "http://www.sina.com.cn", "http://www.wordpress.com", "http://www.google.co.uk");
$rc = new RollingCurl("request_callback");
$rc->window_size = 20;
foreach ($urls as $url) {
    $request = new Request($url);
    $rc->add($request);
}
$rc->execute();
コード例 #5
0
 public function fetchAllOnce(array $urls, $isRedirect = false)
 {
     if (!$isRedirect) {
         $urls = array_unique($urls);
     }
     if (empty($urls)) {
         return;
     }
     //////////////////////////////////////////////////////
     // parallel (HttpRequestPool)
     if ($this->method == self::METHOD_REQUEST_POOL) {
         $this->debug('Starting parallel fetch (HttpRequestPool)');
         try {
             while (count($urls) > 0) {
                 $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls)));
                 $subset = array_splice($urls, 0, $this->maxParallelRequests);
                 $pool = new HttpRequestPool();
                 foreach ($subset as $orig => $url) {
                     if (!$isRedirect) {
                         $orig = $url;
                     }
                     unset($this->redirectQueue[$orig]);
                     $this->debug("...{$url}");
                     if (!$isRedirect && isset($this->requests[$url])) {
                         $this->debug("......in memory");
                         /*
                         } elseif ($this->isCached($url)) {
                         	$this->debug("......is cached");
                         	if (!$this->minimiseMemoryUse) {
                         		$this->requests[$url] = $this->getCached($url);
                         	}
                         */
                     } else {
                         $this->debug("......adding to pool");
                         $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                         $req_url = $this->removeFragment($req_url);
                         $httpRequest = new HttpRequest($req_url, HttpRequest::METH_GET, $this->requestOptions);
                         // send cookies, if we have any
                         if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                             $this->debug("......sending cookies: {$cookies}");
                             $httpRequest->addHeaders(array('Cookie' => $cookies));
                         }
                         $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest);
                         $this->requests[$orig]['original_url'] = $orig;
                         $pool->attach($httpRequest);
                     }
                 }
                 // did we get anything into the pool?
                 if (count($pool) > 0) {
                     $this->debug('Sending request...');
                     try {
                         $pool->send();
                     } catch (HttpRequestPoolException $e) {
                         // do nothing
                     }
                     $this->debug('Received responses');
                     foreach ($subset as $orig => $url) {
                         if (!$isRedirect) {
                             $orig = $url;
                         }
                         //if (!isset($this->requests[$url]['fromCache'])) {
                         $request = $this->requests[$orig]['httpRequest'];
                         //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
                         // getResponseHeader() doesn't return status line, so, for consistency...
                         $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
                         $this->requests[$orig]['body'] = $request->getResponseBody();
                         $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
                         $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
                         // is redirect?
                         if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
                             $redirectURL = $request->getResponseHeader('location');
                             if (!preg_match('!^https?://!i', $redirectURL)) {
                                 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                             }
                             if ($this->validateURL($redirectURL)) {
                                 $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                                 // store any cookies
                                 $cookies = $request->getResponseHeader('set-cookie');
                                 if ($cookies && !is_array($cookies)) {
                                     $cookies = array($cookies);
                                 }
                                 if ($cookies) {
                                     $this->cookieJar->storeCookies($url, $cookies);
                                 }
                                 $this->redirectQueue[$orig] = $redirectURL;
                             } else {
                                 $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                             }
                         }
                         //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
                         $pool->detach($request);
                         unset($this->requests[$orig]['httpRequest'], $request);
                         /*
                         if ($this->minimiseMemoryUse) {
                         	if ($this->cache($url)) {
                         		unset($this->requests[$url]);
                         	}
                         }
                         */
                         //}
                     }
                 }
             }
         } catch (HttpException $e) {
             $this->debug($e);
             return false;
         }
     } elseif ($this->method == self::METHOD_CURL_MULTI) {
         $this->debug('Starting parallel fetch (curl_multi_*)');
         while (count($urls) > 0) {
             $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls)));
             $subset = array_splice($urls, 0, $this->maxParallelRequests);
             $pool = new RollingCurl(array($this, 'handleCurlResponse'));
             $pool->window_size = count($subset);
             foreach ($subset as $orig => $url) {
                 if (!$isRedirect) {
                     $orig = $url;
                 }
                 unset($this->redirectQueue[$orig]);
                 $this->debug("...{$url}");
                 if (!$isRedirect && isset($this->requests[$url])) {
                     $this->debug("......in memory");
                     /*
                     } elseif ($this->isCached($url)) {
                     	$this->debug("......is cached");
                     	if (!$this->minimiseMemoryUse) {
                     		$this->requests[$url] = $this->getCached($url);
                     	}
                     */
                 } else {
                     $this->debug("......adding to pool");
                     $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                     $req_url = $this->removeFragment($req_url);
                     $headers = array();
                     // send cookies, if we have any
                     if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                         $this->debug("......sending cookies: {$cookies}");
                         $headers[] = 'Cookie: ' . $cookies;
                     }
                     $httpRequest = new RollingCurlRequest($req_url, 'GET', null, $headers, array(CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], CURLOPT_TIMEOUT => $this->requestOptions['timeout']));
                     $httpRequest->set_original_url($orig);
                     $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest);
                     $this->requests[$orig]['original_url'] = $orig;
                     // TODO: is this needed anymore?
                     $pool->add($httpRequest);
                 }
             }
             // did we get anything into the pool?
             if (count($pool) > 0) {
                 $this->debug('Sending request...');
                 $pool->execute();
                 // this will call handleCurlResponse() and populate $this->requests[$orig]
                 $this->debug('Received responses');
                 foreach ($subset as $orig => $url) {
                     if (!$isRedirect) {
                         $orig = $url;
                     }
                     // $this->requests[$orig]['headers']
                     // $this->requests[$orig]['body']
                     // $this->requests[$orig]['effective_url']
                     $status_code = $this->requests[$orig]['status_code'];
                     if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
                         $redirectURL = $this->requests[$orig]['location'];
                         if (!preg_match('!^https?://!i', $redirectURL)) {
                             $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                         }
                         if ($this->validateURL($redirectURL)) {
                             $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                             // store any cookies
                             $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
                             if (!empty($cookies)) {
                                 $this->cookieJar->storeCookies($url, $cookies);
                             }
                             $this->redirectQueue[$orig] = $redirectURL;
                         } else {
                             $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                         }
                     }
                     // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
                     unset($this->requests[$orig]['httpRequest']);
                 }
             }
         }
     } else {
         $this->debug('Starting sequential fetch (file_get_contents)');
         $this->debug('Processing set of ' . count($urls));
         foreach ($urls as $orig => $url) {
             if (!$isRedirect) {
                 $orig = $url;
             }
             unset($this->redirectQueue[$orig]);
             $this->debug("...{$url}");
             if (!$isRedirect && isset($this->requests[$url])) {
                 $this->debug("......in memory");
                 /*
                 } elseif ($this->isCached($url)) {
                 	$this->debug("......is cached");
                 	if (!$this->minimiseMemoryUse) {
                 		$this->requests[$url] = $this->getCached($url);
                 	}
                 */
             } else {
                 $this->debug("Sending request for {$url}");
                 $this->requests[$orig]['original_url'] = $orig;
                 $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                 $req_url = $this->removeFragment($req_url);
                 // send cookies, if we have any
                 $httpContext = $this->httpContext;
                 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                     $this->debug("......sending cookies: {$cookies}");
                     $httpContext['http']['header'] .= 'Cookie: ' . $cookies . "\r\n";
                 }
                 if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
                     $this->debug('Received response');
                     // get status code
                     if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\\d+\\.\\d+\\s+(\\d+)!', trim($http_response_header[0]), $match)) {
                         $this->debug('Error: no status code found');
                         // TODO: handle error - no status code
                     } else {
                         $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
                         $this->requests[$orig]['body'] = $html;
                         $this->requests[$orig]['effective_url'] = $req_url;
                         $this->requests[$orig]['status_code'] = $status_code = (int) $match[1];
                         unset($match);
                         // handle redirect
                         if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
                             $this->requests[$orig]['location'] = trim($match[1]);
                         }
                         if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
                             $redirectURL = $this->requests[$orig]['location'];
                             if (!preg_match('!^https?://!i', $redirectURL)) {
                                 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                             }
                             if ($this->validateURL($redirectURL)) {
                                 $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                                 // store any cookies
                                 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
                                 if (!empty($cookies)) {
                                     $this->cookieJar->storeCookies($url, $cookies);
                                 }
                                 $this->redirectQueue[$orig] = $redirectURL;
                             } else {
                                 $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                             }
                         }
                     }
                 } else {
                     $this->debug('Error retrieving URL');
                     //print_r($req_url);
                     //print_r($http_response_header);
                     //print_r($html);
                     // TODO: handle error - failed to retrieve URL
                 }
             }
         }
     }
 }
コード例 #6
0
    } else {
        $result = "fail\n";
        if ($response[0] == '<') {
            $result .= "\t\tReceived: " . $response . "...\n";
        } else {
            $result .= "\t\tReceived: " . base64_encode(substr($response, 0, 50)) . "...\n";
        }
        $result .= "\t\tSent: " . base64_encode(substr($png, 0, 50)) . "...\n";
        $results[$request->index] = $result;
    }
});
for ($x = 1; $x <= $files_appearing; $x++) {
    $results[$x] = 'unknown';
    $request = new RollingCurlRequest('http://' . $username . ':' . $password . '@' . $owncloud_remote . '/webdav' . $testdir . '/sample_' . $x . '.png', 'GET');
    $request->index = $x;
    $verify->add($request);
}
$verify->execute($window);
echo "\nVerification:\n";
foreach ($results as $idx => $result) {
    echo "\t{$idx}: {$result}\n";
    if ($result != 'pass') {
        $hasproblem = true;
    }
}
if ($hasproblem) {
    echo "\nREVIEW THE RESULTS ABOVE FOR A POTENTIAL PROBLEM.\n";
} else {
    echo "\nNO OBVIOUS PROBLEMS DETECTED.\n";
}
// Clean up temp images
コード例 #7
0
 public function sendRequestCallBack($response, $info = '')
 {
     // 处理内容
     $response = preg_replace("/<code[\\s\\S]*><!--/iU", "", $response);
     $response = preg_replace("/--><\\/code>/iU", "", $response);
     // 分析html,获取添加好友需要的数据
     \phpQuery::newDocumentHTML($response);
     $pagelet_timeline_main_column = pq("#pagelet_timeline_main_column")->attr("data-gt");
     $pagelet_timeline_main_column = json_decode($pagelet_timeline_main_column);
     $profile_owner = $pagelet_timeline_main_column->profile_owner;
     $requests = array();
     foreach (pq("div.fsl.fwb.fcb > a") as $value) {
         $data_hovercard = pq($value)->attr("data-hovercard");
         $data_gt = pq($value)->attr("data-gt");
         $data_gt = json_decode($data_gt);
         preg_match("/\\?id=([0-9]*)&/iU", $data_hovercard, $matches);
         $to_friend = $matches[1];
         // 发送好友请求
         $query = array("to_friend" => $to_friend, "action" => "add_friend", "how_found" => "profile_friends", "ref_param" => "pb_friends_tl", "link_data[gt][coeff2_registry_key]" => $data_gt->coeff2_registry_key, "link_data[gt][coeff2_info]" => $data_gt->coeff2_info, "link_data[gt][coeff2_action]" => $data_gt->coeff2_action, "link_data[gt][coeff2_pv_signature]" => $data_gt->coeff2_pv_signature, "link_data[gt][profile_owner]" => $profile_owner, "link_data[gt][ref]" => "timeline:timeline", "outgoing_id" => '', "logging_location" => '', "no_flyout_on_click" => "true", "ego_log_data" => '', "http_referer" => '', "floc" => "friends_tab", "__user" => $this->user_id, "__a" => "1", "fb_dtsg" => $this->token, "__rev" => $this->version);
         $capt_opts = $this->curl_opts;
         $url = "https://www.facebook.com/ajax/add_friend/action.php?__pc=EXP1%3ADEFAULT";
         $capt_opts[CURLOPT_POST] = true;
         $capt_opts[CURLOPT_POSTFIELDS] = $query;
         $request = new \RollingCurlRequest($url);
         $request->options = $capt_opts;
         $requests[] = $request;
     }
     if (empty($requests)) {
         return;
     }
     $rc = new \RollingCurl();
     if (sizeof($requests) < 20) {
         $rc->window_size = sizeof($requests);
     } else {
         $rc->window_size = 20;
     }
     foreach ($requests as $value) {
         $rc->add($value);
     }
     $rc->execute();
 }
コード例 #8
0
//    'dol.gov',
//    'humanresources.about.com',
//    'shrm.org',
//    'diversityinc.com',
//    'stevepavlina.com/blog/',
//    'osha.gov',
//    'hr.com',
//    'ere.net',
//    'cisin.com',
//    'blr.com',
//    'peopleadmin.com',
//    'wageworks.com',
//    'dalecarnegie.com',
//    'doleta.gov',
//    'mercer.com',
//    'astd.org',
//    'brightscope.com',
//    'tmp.com',
//    'trinet.com',
//];
// DETECT CYCLE!
$loader = new FileLoader();
$scanner = new Scanner($loader, __DIR__ . '/apps.json');
$category = new Category($loader, __DIR__ . '/apps.json');
require_once __DIR__ . '/lib/RollingCurl.php';
$rc = new RollingCurl();
foreach ($urls as $url) {
    $rc->add(new RollingCurlRequest("http://scanner.loc/worker.php?url=" . urlencode($url)));
}
$rc->execute(10);
echo "All request sent";