/** * Launch a google Search * @param string $searchTerm the string to search. Or if not specified will take the given with ->searchTerm($search) * @param array $options Options for the query . available options : * + proxy : a proxyDefinition item to proxyfy the request * + * + * * @return GoogleDOM the Google DOMDocument * @throws Exception * @throws \GoogleUrl\CaptachaException google detected us as a bot */ public function search($searchTerm = null, \GoogleUrl\SimpleProxyInterface $proxy = null) { /**====================== * CHANGE SEARCH IF NEEDED ========================*/ if (null !== $searchTerm) { $this->searchTerm($searchTerm); } else { if (!strlen($this->param("q")) > 0) { throw new Exception("Nothing to Search"); } } /**========= * INIT CURL =========*/ $c = new \GoogleUrl\Curl(); $c->url = $this->__toString(); /**========== * DO HEADERS ===========*/ // let's be redirected if needed $c->followLocation(); // use a true user agent, maybe better for true results $c->useragent = $this->userAgent; // use other headers // accept-langage to make sure google use the same language as asked $header[] = "Accept-Language: " . $this->acceptLangage; $c->HTTPHEADER = $header; /**========= * SET PROXY =========*/ if ($proxy) { $c->proxy = $proxy->getIp(); $c->proxyport = $proxy->getPort(); $login = $proxy->getLogin(); if ($login) { $auth = $login; $psw = $proxy->getPassword(); if ($psw) { $auth .= ":" . $psw; } $c->proxyuserpwd = $auth; } $proxyType = $proxy->getProxyType(); $c->proxytype = $proxyType ? $proxyType : "http"; } /**======== * EXECUTE =========*/ $r = $c->exec(); if (false === $r) { $errno = $c->errno(); if (CURLE_COULDNT_RESOLVE_PROXY == $errno) { throw new \GoogleUrl\Exception\ProxyException("HTTP query failled [curl-error : {$errno} - " . $c->error() . " ] for the following URL : " . $this); } else { throw new \GoogleUrl\Exception\CurlException("HTTP query failled [curl-error : {$errno} - " . $c->error() . " ] for the following URL : " . $this); } } /**=============== * CREATE DOCUMENT ================*/ $doc = new GoogleDOM($this->param("q"), $this->getUrl(), $this->getPage(), $this->param(self::PARAM_NBRESULTS)); libxml_use_internal_errors(TRUE); $doc->loadHTML($r); libxml_use_internal_errors(FALSE); libxml_clear_errors(); if ($doc->isCaptcha()) { throw new \GoogleUrl\Exception\CaptachaException(); } return $doc; }
public static function fromSimpleProxy(\GoogleUrl\SimpleProxyInterface $proxy, $lastRun = 0, $nextDelay = 0, $delayCount = 0, $locked = false) { return new static($proxy->getIp(), $proxy->getPort(), $proxy->getLogin(), $proxy->getPassword(), $proxy->getProxyType(), $lastRun, $nextDelay, $delayCount, $locked); }
/** * returns the proxy string identifier */ private function __id(\GoogleUrl\SimpleProxyInterface $proxy) { return $proxy->getIp() . ":" . $proxy->getPort(); }