public function getAllMetadata() { $GLOBALS['metastores'] = array(); $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_USERAGENT, 'Parallel Curl google API request'); $parallel_curl = new ParallelCurl(count($this->videoResults), $curl_options); foreach ($this->videoResults as $id) { $metastore = new MetadataStore(); $search_url = "https://www.googleapis.com/youtube/v3/videos?id=" . $id . "&key=" . API_KEY . "&part=snippet,statistics,contentDetails,recordingDetails"; $parallel_curl->startRequest($search_url, 'on_request_done', array('id' => $id)); } $parallel_curl->finishAllRequests(); return $GLOBALS['metastores']; }
/** * curl 多线程方法 */ public function curlResult($urls) { include_once WEB_PATH . 'lib/parallelCurl.class.php'; $max_requests = isset($argv[1]) ? $argv[1] : 20; //最大调用次数 $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_USERAGENT, 'Parallel Curl test script'); $parallelCurl = new ParallelCurl($max_requests, $curl_options); foreach ($urls as $key => $terms) { if (empty($terms)) { continue; } $parallelCurl->startRequest($terms, '', array($key)); } $parallelCurl->finishAllRequests(); return $parallelCurl->result; }
$max_requests = $options['maxrequests']; $organization = $options['organization']; $email = $options['email']; if (empty($organization) || empty($email) || !strpos($email, '@')) { die("You need to specify a valid organization and email address (found '{$organization}', '{$email}')\n"); } $agent = 'Crawler from ' . $organization; $agent .= ' - contact ' . $email; $agent .= ' to report any problems with my crawling. Based on code from http://petewarden.typepad.com'; $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_USERAGENT => $agent); $urls_string = file_get_contents($input); $urls = split("\n", $urls_string); $output_handle = fopen($output, 'w'); $parallel_curl = new ParallelCurl($max_requests, $curl_options); $count = 0; foreach ($urls as $url) { $count += 1; if ($count % 100 == 0) { error_log("Completed {$count} urls"); } if (!preg_match('@^/company/@', $url)) { continue; } $full_url = 'http://api.crunchbase.com/v/1' . $url . '.js'; $data = array('output_handle' => $output_handle); $parallel_curl->startRequest($full_url, 'on_request_done', $data); } // This should be called when you need to wait for the requests to finish. // This will automatically run on destruct of the ParallelCurl object, so the next line is optional. $parallel_curl->finishAllRequests();
function scan_queue($profile_list, $gid) { if (donator_level(20)) { $max_requests = 24; } else { $max_requests = 2; } $curl_options = array(CURLOPT_RETURNTRANSFER => 1, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_TIMEOUT => 7, CURLOPT_FOLLOWLOCATION => TRUE); $parallel_curl = new ParallelCurl($max_requests, $curl_options); foreach ($profile_list as $profile) { if ($gid == 440) { $url = 'http://api.steampowered.com/IEconItems_440/GetPlayerItems/v0001/?key=' . AKey() . '&SteamID=' . $profile['steamid'] . '&format=json'; $parallel_curl->startRequest($url, 'scan_440_single', $profile); } if ($gid == 730) { $url = 'http://steamcommunity.com/profiles/' . $profile['steamid'] . '/inventory/json/730/2'; $parallel_curl->startRequest($url, 'scan_730_single', $profile); } if ($gid == 570) { $url = 'http://api.steampowered.com/IEconItems_570/GetPlayerItems/v0001/?key=' . AKey() . '&SteamID=' . $profile['steamid'] . '&format=json'; $parallel_curl->startRequest($url, 'scan_570_single', $profile); } //ob_flush(); //flush(); } // This should be called when you need to wait for the requests to finish. // This will automatically run on destruct of the ParallelCurl object, so the next line is optional. $parallel_curl->finishAllRequests(); }
$bro_tips[$tipID] = array('tip' => $tip, 'count' => $count); } // Set up synchronus cURL wrapper require_once 'parallelcurl.php'; $curl_options = array(CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_AUTOREFERER => TRUE, CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_MAXREDIRS => 3, CURLOPT_NOPROGRESS => TRUE); $maxConcurrent = 10; $pcurl = new ParallelCurl($maxConcurrent, $curl_options); // Set to 10 for testing. $topTip = 1978; // Perform scraping. note that there's no way to randomize the wait // between requests that I can find. Maybe I'll extend that class? // Set $i to something > 1500 for testing. for ($i = 1; $i <= $topTip; ++$i) { $pcurl->startRequest("http://www.brotips.com/{$i}", 'store_tip', false); } $pcurl->finishAllRequests(); // Since DB4 is being pile of poo, lets also try CSV as a backup. $writeTipsCSV = True; if (!($csvHandle = fopen('brotips.csv', 'w'))) { echo "Fffuuuu unable to open brotips.csv file.. "; $writeTipsCSV = False; } foreach ($bro_tips as $tipID => $tipData) { echo "\$tipID: {$tipID}\n"; print_r($tipData); if ($writeTipsCSV) { $csvTip = array($tipID, $tipData['count'], $tipData['tip']); fputcsv($csvHandle, $csvTip, "\t", '"'); } } // Clean up and close