Beispiel #1
0
 /**
  * Launch a google Search
  * @param string $searchTerm the string to search. Or if not specified will take the given with ->searchTerm($search)
  * @param array $options Options for the query . available options :
  *                       + proxy : a proxyDefinition item to proxyfy the request
  *                       + 
  *                       + 
  *                       
  * @return GoogleDOM the Google DOMDocument
  * @throws Exception
  * @throws \GoogleUrl\CaptachaException google detected us as a bot
  */
 public function search($searchTerm = null, \GoogleUrl\SimpleProxyInterface $proxy = null)
 {
     /**======================
        * CHANGE SEARCH IF NEEDED
         ========================*/
     if (null !== $searchTerm) {
         $this->searchTerm($searchTerm);
     } else {
         if (!strlen($this->param("q")) > 0) {
             throw new Exception("Nothing to Search");
         }
     }
     /**=========
        * INIT CURL
         =========*/
     $c = new \GoogleUrl\Curl();
     $c->url = $this->__toString();
     /**==========
        * DO HEADERS
         ===========*/
     // let's be redirected if needed
     $c->followLocation();
     // use a true user agent, maybe better for true results
     $c->useragent = $this->userAgent;
     // use other headers
     // accept-langage to make sure google use the same language as asked
     $header[] = "Accept-Language: " . $this->acceptLangage;
     $c->HTTPHEADER = $header;
     /**=========
        * SET PROXY
          =========*/
     if ($proxy) {
         $c->proxy = $proxy->getIp();
         $c->proxyport = $proxy->getPort();
         $login = $proxy->getLogin();
         if ($login) {
             $auth = $login;
             $psw = $proxy->getPassword();
             if ($psw) {
                 $auth .= ":" . $psw;
             }
             $c->proxyuserpwd = $auth;
         }
         $proxyType = $proxy->getProxyType();
         $c->proxytype = $proxyType ? $proxyType : "http";
     }
     /**========
        * EXECUTE
         =========*/
     $r = $c->exec();
     if (false === $r) {
         $errno = $c->errno();
         if (CURLE_COULDNT_RESOLVE_PROXY == $errno) {
             throw new \GoogleUrl\Exception\ProxyException("HTTP query failled [curl-error : {$errno} - " . $c->error() . " ] for the following URL : " . $this);
         } else {
             throw new \GoogleUrl\Exception\CurlException("HTTP query failled [curl-error : {$errno} - " . $c->error() . " ] for the following URL : " . $this);
         }
     }
     /**===============
        * CREATE DOCUMENT
         ================*/
     $doc = new GoogleDOM($this->param("q"), $this->getUrl(), $this->getPage(), $this->param(self::PARAM_NBRESULTS));
     libxml_use_internal_errors(TRUE);
     $doc->loadHTML($r);
     libxml_use_internal_errors(FALSE);
     libxml_clear_errors();
     if ($doc->isCaptcha()) {
         throw new \GoogleUrl\Exception\CaptachaException();
     }
     return $doc;
 }
Beispiel #2
0
 public static function fromSimpleProxy(\GoogleUrl\SimpleProxyInterface $proxy, $lastRun = 0, $nextDelay = 0, $delayCount = 0, $locked = false)
 {
     return new static($proxy->getIp(), $proxy->getPort(), $proxy->getLogin(), $proxy->getPassword(), $proxy->getProxyType(), $lastRun, $nextDelay, $delayCount, $locked);
 }
Beispiel #3
0
 /**
  * returns the proxy string identifier
  */
 private function __id(\GoogleUrl\SimpleProxyInterface $proxy)
 {
     return $proxy->getIp() . ":" . $proxy->getPort();
 }