Example #1
0
 /**
  * 抓取网页
  *
  * @param int $window_size curl批处理句柄最大同时连接数
  */
 public function fetch_html($window_size = NULL)
 {
     $rc = new RollingCurl($this->callback);
     foreach ($this->urls as $url) {
         $request = new RollingCurlRequest($url, 'Get', NULL, NULL, $this->options);
         $rc->add($request);
     }
     if (!empty($window_size)) {
         $rc->execute($window_size);
     } else {
         $rc->execute($this->window_size);
     }
 }
Example #2
0
 public static function multiGetFromCiteServer($mode, $sets, $style = 'chicago-note-bibliography')
 {
     require_once "../include/RollingCurl.inc.php";
     $t = microtime(true);
     $setIDs = array();
     $data = array();
     $requestCallback = function ($response, $info) use($mode, &$setIDs, &$data) {
         if ($info['http_code'] != 200) {
             error_log("WARNING: HTTP {$info['http_code']} from citeserver {$mode} request: " . $response);
             return;
         }
         $response = json_decode($response);
         if (!$response) {
             error_log("WARNING: Invalid response from citeserver {$mode} request: " . $response);
             return;
         }
         $str = parse_url($info['url']);
         $str = parse_str($str['query']);
         if ($mode == 'citation') {
             $data[$setIDs[$setID]] = Zotero_Cite::processCitationResponse($response);
         } else {
             if ($mode == "bib") {
                 $data[$setIDs[$setID]] = Zotero_Cite::processBibliographyResponse($response);
             }
         }
     };
     $rc = new RollingCurl($requestCallback);
     // Number of simultaneous requests
     $rc->window_size = 20;
     foreach ($sets as $key => $items) {
         $json = self::getJSONFromItems($items);
         $server = Z_CONFIG::$CITATION_SERVERS[array_rand(Z_CONFIG::$CITATION_SERVERS)];
         $url = "http://{$server}/?responseformat=json&style={$style}";
         if ($mode == 'citation') {
             $url .= "&citations=1&bibliography=0";
         }
         // Include array position in URL so that the callback can figure
         // out what request this was
         $url .= "&setID=" . $key;
         // TODO: support multiple items per set, if necessary
         if (!$items instanceof Zotero_Item) {
             throw new Exception("items is not a Zotero_Item");
         }
         $setIDs[$key] = $items->libraryID . "/" . $items->key;
         $request = new RollingCurlRequest($url);
         $request->options = array(CURLOPT_POST => 1, CURLOPT_POSTFIELDS => $json, CURLOPT_HTTPHEADER => array("Expect:"), CURLOPT_CONNECTTIMEOUT => 1, CURLOPT_TIMEOUT => 4, CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1);
         $rc->add($request);
     }
     $rc->execute();
     error_log(sizeOf($sets) . " {$mode} requests in " . round(microtime(true) - $t, 3));
     return $data;
 }
 public function execute($window_size = null)
 {
     if (count($this->requests) == 0) {
         return false;
     }
     return parent::execute($window_size);
 }
Example #4
0
// a little example that fetches a bunch of sites in parallel and echos the page title and response info for each request
function request_callback($response, $info)
{
    // parse the page title out of the returned HTML
    if (preg_match("~<title>(.*?)</title>~i", $response, $out)) {
        $title = $out[1];
    }
    echo "<b>{$title}</b><br />";
    print_r($info);
    echo "<hr>";
}
require "RollingCurl.php";
// single curl request
$rc = new RollingCurl("request_callback");
$rc->request("http://www.msn.com");
$rc->execute();
// another single curl request
$rc = new RollingCurl("request_callback");
$rc->request("http://www.google.com");
$rc->execute();
echo "<hr>";
// top 20 sites according to alexa (11/5/09)
$urls = array("http://www.google.com", "http://www.facebook.com", "http://www.yahoo.com", "http://www.youtube.com", "http://www.live.com", "http://www.wikipedia.com", "http://www.blogger.com", "http://www.msn.com", "http://www.baidu.com", "http://www.yahoo.co.jp", "http://www.myspace.com", "http://www.qq.com", "http://www.google.co.in", "http://www.twitter.com", "http://www.google.de", "http://www.microsoft.com", "http://www.google.cn", "http://www.sina.com.cn", "http://www.wordpress.com", "http://www.google.co.uk");
$rc = new RollingCurl("request_callback");
$rc->window_size = 20;
foreach ($urls as $url) {
    $request = new Request($url);
    $rc->add($request);
}
$rc->execute();
 /**
  * Starting connections function execution overload
  *
  * @access public
  *
  * @throws AngryCurlException
  *
  * @param int $window_size Max number of simultaneous connections
  *
  * @return string|bool
  */
 public function execute($window_size = null)
 {
     # checking $window_size var
     if ($window_size == null) {
         self::add_debug_msg(" (!) Default threads amount value (5) is used");
     } elseif ($window_size > 0 && is_int($window_size)) {
         self::add_debug_msg(" * Threads set to:\t{$window_size}");
     } else {
         throw new AngryCurlException(" (!) Wrong threads amount in execute():\t{$window_size}");
     }
     # writing debug
     self::add_debug_msg(" * Starting connections");
     //var_dump($this->__get('requests'));
     $time_start = microtime(1);
     $result = parent::execute($window_size);
     $time_end = microtime(1);
     # writing debug
     self::add_debug_msg(" * Finished in " . round($time_end - $time_start, 2) . "s");
     return $result;
 }
Example #6
0
        $result = "fail\n";
        if ($response[0] == '<') {
            $result .= "\t\tReceived: " . $response . "...\n";
        } else {
            $result .= "\t\tReceived: " . base64_encode(substr($response, 0, 50)) . "...\n";
        }
        $result .= "\t\tSent: " . base64_encode(substr($png, 0, 50)) . "...\n";
        $results[$request->index] = $result;
    }
});
for ($x = 1; $x <= $files_appearing; $x++) {
    $results[$x] = 'unknown';
    $request = new RollingCurlRequest('http://' . $username . ':' . $password . '@' . $owncloud_remote . '/webdav' . $testdir . '/sample_' . $x . '.png', 'GET');
    $request->index = $x;
    $verify->add($request);
}
$verify->execute($window);
echo "\nVerification:\n";
foreach ($results as $idx => $result) {
    echo "\t{$idx}: {$result}\n";
    if ($result != 'pass') {
        $hasproblem = true;
    }
}
if ($hasproblem) {
    echo "\nREVIEW THE RESULTS ABOVE FOR A POTENTIAL PROBLEM.\n";
} else {
    echo "\nNO OBVIOUS PROBLEMS DETECTED.\n";
}
// Clean up temp images
randpng(0);
 public function sendRequestCallBack($response, $info = '')
 {
     // 处理内容
     $response = preg_replace("/<code[\\s\\S]*><!--/iU", "", $response);
     $response = preg_replace("/--><\\/code>/iU", "", $response);
     // 分析html,获取添加好友需要的数据
     \phpQuery::newDocumentHTML($response);
     $pagelet_timeline_main_column = pq("#pagelet_timeline_main_column")->attr("data-gt");
     $pagelet_timeline_main_column = json_decode($pagelet_timeline_main_column);
     $profile_owner = $pagelet_timeline_main_column->profile_owner;
     $requests = array();
     foreach (pq("div.fsl.fwb.fcb > a") as $value) {
         $data_hovercard = pq($value)->attr("data-hovercard");
         $data_gt = pq($value)->attr("data-gt");
         $data_gt = json_decode($data_gt);
         preg_match("/\\?id=([0-9]*)&/iU", $data_hovercard, $matches);
         $to_friend = $matches[1];
         // 发送好友请求
         $query = array("to_friend" => $to_friend, "action" => "add_friend", "how_found" => "profile_friends", "ref_param" => "pb_friends_tl", "link_data[gt][coeff2_registry_key]" => $data_gt->coeff2_registry_key, "link_data[gt][coeff2_info]" => $data_gt->coeff2_info, "link_data[gt][coeff2_action]" => $data_gt->coeff2_action, "link_data[gt][coeff2_pv_signature]" => $data_gt->coeff2_pv_signature, "link_data[gt][profile_owner]" => $profile_owner, "link_data[gt][ref]" => "timeline:timeline", "outgoing_id" => '', "logging_location" => '', "no_flyout_on_click" => "true", "ego_log_data" => '', "http_referer" => '', "floc" => "friends_tab", "__user" => $this->user_id, "__a" => "1", "fb_dtsg" => $this->token, "__rev" => $this->version);
         $capt_opts = $this->curl_opts;
         $url = "https://www.facebook.com/ajax/add_friend/action.php?__pc=EXP1%3ADEFAULT";
         $capt_opts[CURLOPT_POST] = true;
         $capt_opts[CURLOPT_POSTFIELDS] = $query;
         $request = new \RollingCurlRequest($url);
         $request->options = $capt_opts;
         $requests[] = $request;
     }
     if (empty($requests)) {
         return;
     }
     $rc = new \RollingCurl();
     if (sizeof($requests) < 20) {
         $rc->window_size = sizeof($requests);
     } else {
         $rc->window_size = 20;
     }
     foreach ($requests as $value) {
         $rc->add($value);
     }
     $rc->execute();
 }
Example #8
0
 public static function curl_multi_get($requests)
 {
     cache::$__LinkToFunc = array();
     cache::$__TempResult = array();
     cache::$__ApiTime = microtime(true);
     $rc = new RollingCurl(function ($response, $info, $request) {
         $url = $request->url;
         if ($response == null) {
             cache::log("CURL RESULT EMPTY for url: " . $url . " with HTTP code " . $info['http_code']);
             /*$response = file_get_contents($url);
             		
             		if( $response == false )
             		{
             			cache::log("FILE_GET_CONTENTS failed for url: ".$url);
             		}*/
         }
         cache::log(sprintf("Got multi result for %s in %s sec, len %s", $url, number_format(microtime(true) - cache::$__ApiTime, 3), strlen($response)));
         $cbtime = microtime(true);
         if (cache::$__LinkToFunc[$url]) {
             call_user_func_array(cache::$__LinkToFunc[$url], array($response));
         }
         cache::log(sprintf("Ran callback for URL in %s sec", number_format(microtime(true) - $cbtime, 3)));
         cache::$__TempResult[$url] = $response;
         cache::put($url, $response);
     });
     foreach ($requests as $request) {
         cache::$__LinkToFunc[$request['url']] = $request['func'];
         $rc->request($request['url']);
     }
     $rc->window_size = count($requests);
     $rc->execute();
     cache::log(sprintf("Multi API request took %s sec", number_format(microtime(true) - cache::$__ApiTime, 3)));
     return cache::$__TempResult;
 }
//    'dol.gov',
//    'humanresources.about.com',
//    'shrm.org',
//    'diversityinc.com',
//    'stevepavlina.com/blog/',
//    'osha.gov',
//    'hr.com',
//    'ere.net',
//    'cisin.com',
//    'blr.com',
//    'peopleadmin.com',
//    'wageworks.com',
//    'dalecarnegie.com',
//    'doleta.gov',
//    'mercer.com',
//    'astd.org',
//    'brightscope.com',
//    'tmp.com',
//    'trinet.com',
//];
// DETECT CYCLE!
$loader = new FileLoader();
$scanner = new Scanner($loader, __DIR__ . '/apps.json');
$category = new Category($loader, __DIR__ . '/apps.json');
require_once __DIR__ . '/lib/RollingCurl.php';
$rc = new RollingCurl();
foreach ($urls as $url) {
    $rc->add(new RollingCurlRequest("http://scanner.loc/worker.php?url=" . urlencode($url)));
}
$rc->execute(10);
echo "All request sent";