コード例 #1
0
 public function getAllMetadata()
 {
     $GLOBALS['metastores'] = array();
     $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_USERAGENT, 'Parallel Curl google API request');
     $parallel_curl = new ParallelCurl(count($this->videoResults), $curl_options);
     foreach ($this->videoResults as $id) {
         $metastore = new MetadataStore();
         $search_url = "https://www.googleapis.com/youtube/v3/videos?id=" . $id . "&key=" . API_KEY . "&part=snippet,statistics,contentDetails,recordingDetails";
         $parallel_curl->startRequest($search_url, 'on_request_done', array('id' => $id));
     }
     $parallel_curl->finishAllRequests();
     return $GLOBALS['metastores'];
 }
コード例 #2
0
 /**
  * curl 多线程方法
  */
 public function curlResult($urls)
 {
     include_once WEB_PATH . 'lib/parallelCurl.class.php';
     $max_requests = isset($argv[1]) ? $argv[1] : 20;
     //最大调用次数
     $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_USERAGENT, 'Parallel Curl test script');
     $parallelCurl = new ParallelCurl($max_requests, $curl_options);
     foreach ($urls as $key => $terms) {
         if (empty($terms)) {
             continue;
         }
         $parallelCurl->startRequest($terms, '', array($key));
     }
     $parallelCurl->finishAllRequests();
     return $parallelCurl->result;
 }
コード例 #3
0
$max_requests = $options['maxrequests'];
$organization = $options['organization'];
$email = $options['email'];
if (empty($organization) || empty($email) || !strpos($email, '@')) {
    die("You need to specify a valid organization and email address (found '{$organization}', '{$email}')\n");
}
$agent = 'Crawler from ' . $organization;
$agent .= ' - contact ' . $email;
$agent .= ' to report any problems with my crawling. Based on code from http://petewarden.typepad.com';
$curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_USERAGENT => $agent);
$urls_string = file_get_contents($input);
$urls = split("\n", $urls_string);
$output_handle = fopen($output, 'w');
$parallel_curl = new ParallelCurl($max_requests, $curl_options);
$count = 0;
foreach ($urls as $url) {
    $count += 1;
    if ($count % 100 == 0) {
        error_log("Completed {$count} urls");
    }
    if (!preg_match('@^/company/@', $url)) {
        continue;
    }
    $full_url = 'http://api.crunchbase.com/v/1' . $url . '.js';
    $data = array('output_handle' => $output_handle);
    $parallel_curl->startRequest($full_url, 'on_request_done', $data);
}
// This should be called when you need to wait for the requests to finish.
// This will automatically run on destruct of the ParallelCurl object, so the next line is optional.
$parallel_curl->finishAllRequests();
コード例 #4
0
function scan_queue($profile_list, $gid)
{
    if (donator_level(20)) {
        $max_requests = 24;
    } else {
        $max_requests = 2;
    }
    $curl_options = array(CURLOPT_RETURNTRANSFER => 1, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_TIMEOUT => 7, CURLOPT_FOLLOWLOCATION => TRUE);
    $parallel_curl = new ParallelCurl($max_requests, $curl_options);
    foreach ($profile_list as $profile) {
        if ($gid == 440) {
            $url = 'http://api.steampowered.com/IEconItems_440/GetPlayerItems/v0001/?key=' . AKey() . '&SteamID=' . $profile['steamid'] . '&format=json';
            $parallel_curl->startRequest($url, 'scan_440_single', $profile);
        }
        if ($gid == 730) {
            $url = 'http://steamcommunity.com/profiles/' . $profile['steamid'] . '/inventory/json/730/2';
            $parallel_curl->startRequest($url, 'scan_730_single', $profile);
        }
        if ($gid == 570) {
            $url = 'http://api.steampowered.com/IEconItems_570/GetPlayerItems/v0001/?key=' . AKey() . '&SteamID=' . $profile['steamid'] . '&format=json';
            $parallel_curl->startRequest($url, 'scan_570_single', $profile);
        }
        //ob_flush();
        //flush();
    }
    // This should be called when you need to wait for the requests to finish.
    // This will automatically run on destruct of the ParallelCurl object, so the next line is optional.
    $parallel_curl->finishAllRequests();
}
コード例 #5
0
ファイル: brotips-scrape.php プロジェクト: 4v4t4r/BroCTF
    $bro_tips[$tipID] = array('tip' => $tip, 'count' => $count);
}
// Set up synchronus cURL wrapper
require_once 'parallelcurl.php';
$curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_AUTOREFERER => TRUE, CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_MAXREDIRS => 3, CURLOPT_NOPROGRESS => TRUE);
$maxConcurrent = 10;
$pcurl = new ParallelCurl($maxConcurrent, $curl_options);
// Set to 10 for testing.
$topTip = 1978;
// Perform scraping.  note that there's no way to randomize the wait
// between requests that I can find.  Maybe I'll extend that class?
// Set $i to something > 1500 for testing.
for ($i = 1; $i <= $topTip; ++$i) {
    $pcurl->startRequest("http://www.brotips.com/{$i}", 'store_tip', false);
}
$pcurl->finishAllRequests();
// Since DB4 is being pile of poo, lets also try CSV as a backup.
$writeTipsCSV = True;
if (!($csvHandle = fopen('brotips.csv', 'w'))) {
    echo "Fffuuuu unable to open brotips.csv file..  ";
    $writeTipsCSV = False;
}
foreach ($bro_tips as $tipID => $tipData) {
    echo "\$tipID: {$tipID}\n";
    print_r($tipData);
    if ($writeTipsCSV) {
        $csvTip = array($tipID, $tipData['count'], $tipData['tip']);
        fputcsv($csvHandle, $csvTip, "\t", '"');
    }
}
// Clean up and close