/**
  * Get data from form result
  * @param String $url form URL
  * @param String $type type of submit form method (get or post)
  * @param String or Array $data values form post method
  * @param Array $headers header to submit with the form
  * @return String the result
  */
 public static function getData($url, $type = 'get', $data = '', $headers = '', $cookie = '')
 {
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     //curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5');
     curl_setopt($ch, CURLOPT_USERAGENT, UAgent::random());
     curl_setopt($ch, CURLOPT_TIMEOUT, 10);
     curl_setopt($ch, CURLOPT_HEADERFUNCTION, array('Common', "curlResponseHeaderCallback"));
     if ($type == 'post') {
         curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
         if (is_array($data)) {
             curl_setopt($ch, CURLOPT_POST, count($data));
             $data_string = '';
             foreach ($data as $key => $value) {
                 $data_string .= $key . '=' . $value . '&';
             }
             rtrim($data_string, '&');
             curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
         } else {
             curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
         }
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     }
     if ($headers != '') {
         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
     }
     if ($cookie != '') {
         curl_setopt($ch, CURLOPT_COOKIE, implode($cookie, ';'));
     }
     return curl_exec($ch);
 }
 /**
  * Get data from form result
  * @param String $url form URL
  * @param String $type type of submit form method (get or post)
  * @param String or Array $data values form post method
  * @param Array $headers header to submit with the form
  * @return String the result
  */
 public function getData($url, $type = 'get', $data = '', $headers = '', $cookie = '', $referer = '', $timeout = '')
 {
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     curl_setopt($ch, CURLINFO_HEADER_OUT, true);
     curl_setopt($ch, CURLOPT_ENCODING, "gzip");
     //curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5');
     //		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0');
     curl_setopt($ch, CURLOPT_USERAGENT, UAgent::random());
     if ($timeout == '') {
         curl_setopt($ch, CURLOPT_TIMEOUT, 10);
     } else {
         curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
     }
     curl_setopt($ch, CURLOPT_HEADERFUNCTION, array('Common', "curlResponseHeaderCallback"));
     if ($type == 'post') {
         curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
         if (is_array($data)) {
             curl_setopt($ch, CURLOPT_POST, count($data));
             $data_string = '';
             foreach ($data as $key => $value) {
                 $data_string .= $key . '=' . $value . '&';
             }
             rtrim($data_string, '&');
             curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
         } else {
             curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
         }
     }
     if ($headers != '') {
         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
     }
     if ($cookie != '') {
         curl_setopt($ch, CURLOPT_COOKIE, implode($cookie, ';'));
     }
     if ($referer != '') {
         curl_setopt($ch, CURLOPT_REFERER, $referer);
     }
     $result = curl_exec($ch);
     //		print_r(curl_getinfo($ch));
     curl_close($ch);
     return $result;
 }
Exemple #3
0
set_time_limit(0);
ini_set('display_errors', '1');
#Include libraries
include "ebayClass.php";
$i = 0;
$LIMIT_I = 5000;
$crawler = new EbayCrawler();
$crawler->obeyRobotsTxt(true);
$crawler->addContentTypeReceiveRule("#text/html#");
$crawler->addURLFilterRule("#\\.(jpg|jpeg|gif|png|js|rss|xml|atom|feed)\$# i");
$crawler->setPageLimit(4);
// Set page-limit to 50 for testing
$URLS = array(array('', '', $keyword));
while (isset($URLS[$i]) and $i < $LIMIT_I) {
    $row = $URLS[$i];
    $row[2] = "http://www.ebay.es/sch/i.html?_sacat=0&_nkw=" . $row[2];
    $crawler->setURL($row[2]);
    $UAGENT = new UAgent();
    $crawler->setUserAgentString($UAGENT->random_uagent());
    $crawler->go();
    # REPORTING
    $report = $crawler->getProcessReport();
    echo "Links followed: " . $report->links_followed . " " . PHP_EOL;
    echo "Documents received: " . $report->files_received . " " . PHP_EOL;
    echo "Process runtime: " . $report->process_runtime . " sec" . PHP_EOL;
    echo $lb;
    flush();
    unset($report);
    //sleep(rand(1,3));
    $i++;
}
Exemple #4
0
<?php

/**
 * User Agent Generator - Test
 * @version 1.0
 * @link https://github.com/Dreyer/random-uagent
 * @author Dreyer
 */
error_reporting(E_ALL);
ini_set('display_errors', 1);
require 'uagent.php';
#$occurrences = array();
for ($i = 0; $i < 100; $i++) {
    $ua = UAgent::random();
    #$ua = UAgent::generate();
    #$ua = UAgent::generate( 'chrome', 'mac', array( 'en-US' ) );
    #$ua = UAgent::generate( 'firefox', 'mac', array( 'en-US' ) );
    #$ua = UAgent::generate( 'iexplorer', 'win', array( 'en-GB' ) );
    #$occurrences[$ua] = ( isset( $occurrences[$ua] ) ? $occurrences[$ua] + 1 : 1 );
    echo $ua . PHP_EOL;
}
#var_dump( $occurrences );