/** * Get data from form result * @param String $url form URL * @param String $type type of submit form method (get or post) * @param String or Array $data values form post method * @param Array $headers header to submit with the form * @return String the result */ public static function getData($url, $type = 'get', $data = '', $headers = '', $cookie = '') { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5'); curl_setopt($ch, CURLOPT_USERAGENT, UAgent::random()); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array('Common', "curlResponseHeaderCallback")); if ($type == 'post') { curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST"); if (is_array($data)) { curl_setopt($ch, CURLOPT_POST, count($data)); $data_string = ''; foreach ($data as $key => $value) { $data_string .= $key . '=' . $value . '&'; } rtrim($data_string, '&'); curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string); } else { curl_setopt($ch, CURLOPT_POSTFIELDS, $data); } curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); } if ($headers != '') { curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); } if ($cookie != '') { curl_setopt($ch, CURLOPT_COOKIE, implode($cookie, ';')); } return curl_exec($ch); }
/** * Get data from form result * @param String $url form URL * @param String $type type of submit form method (get or post) * @param String or Array $data values form post method * @param Array $headers header to submit with the form * @return String the result */ public function getData($url, $type = 'get', $data = '', $headers = '', $cookie = '', $referer = '', $timeout = '') { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLINFO_HEADER_OUT, true); curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5'); // curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0'); curl_setopt($ch, CURLOPT_USERAGENT, UAgent::random()); if ($timeout == '') { curl_setopt($ch, CURLOPT_TIMEOUT, 10); } else { curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); } curl_setopt($ch, CURLOPT_HEADERFUNCTION, array('Common', "curlResponseHeaderCallback")); if ($type == 'post') { curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST"); if (is_array($data)) { curl_setopt($ch, CURLOPT_POST, count($data)); $data_string = ''; foreach ($data as $key => $value) { $data_string .= $key . '=' . $value . '&'; } rtrim($data_string, '&'); curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string); } else { curl_setopt($ch, CURLOPT_POSTFIELDS, $data); } } if ($headers != '') { curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); } if ($cookie != '') { curl_setopt($ch, CURLOPT_COOKIE, implode($cookie, ';')); } if ($referer != '') { curl_setopt($ch, CURLOPT_REFERER, $referer); } $result = curl_exec($ch); // print_r(curl_getinfo($ch)); curl_close($ch); return $result; }
set_time_limit(0); ini_set('display_errors', '1'); #Include libraries include "ebayClass.php"; $i = 0; $LIMIT_I = 5000; $crawler = new EbayCrawler(); $crawler->obeyRobotsTxt(true); $crawler->addContentTypeReceiveRule("#text/html#"); $crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png|js|rss|xml|atom|feed)\$# i"); $crawler->setPageLimit(4); // Set page-limit to 50 for testing $URLS = array(array('', '', $keyword)); while (isset($URLS[$i]) and $i < $LIMIT_I) { $row = $URLS[$i]; $row[2] = "http://www.ebay.es/sch/i.html?_sacat=0&_nkw=" . $row[2]; $crawler->setURL($row[2]); $UAGENT = new UAgent(); $crawler->setUserAgentString($UAGENT->random_uagent()); $crawler->go(); # REPORTING $report = $crawler->getProcessReport(); echo "Links followed: " . $report->links_followed . " " . PHP_EOL; echo "Documents received: " . $report->files_received . " " . PHP_EOL; echo "Process runtime: " . $report->process_runtime . " sec" . PHP_EOL; echo $lb; flush(); unset($report); //sleep(rand(1,3)); $i++; }
<?php /** * User Agent Generator - Test * @version 1.0 * @link https://github.com/Dreyer/random-uagent * @author Dreyer */ error_reporting(E_ALL); ini_set('display_errors', 1); require 'uagent.php'; #$occurrences = array(); for ($i = 0; $i < 100; $i++) { $ua = UAgent::random(); #$ua = UAgent::generate(); #$ua = UAgent::generate( 'chrome', 'mac', array( 'en-US' ) ); #$ua = UAgent::generate( 'firefox', 'mac', array( 'en-US' ) ); #$ua = UAgent::generate( 'iexplorer', 'win', array( 'en-GB' ) ); #$occurrences[$ua] = ( isset( $occurrences[$ua] ) ? $occurrences[$ua] + 1 : 1 ); echo $ua . PHP_EOL; } #var_dump( $occurrences );