public function getCurl($url) { $co = new Curl($url); $co->setopt(CURLOPT_RETURNTRANSFER, true); $co->readCookies($this->cookie); $co->storeCookies($this->cookie); $co->setUserAgent("Mozilla/5.0 (X11; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0"); return $co; }
public function getPage($url, $i = 1) { $curl = new Curl(); //$curl->get($url, array(), $this->proxy); $curl->get($url, array()); $curl->setUserAgent($this->getUserAgent()); echo $curl->http_status_code . "\n"; if (200 == $curl->http_status_code) { $body = $curl->response; $findme = 'nid="' . $this->nid . '"'; //echo $findme . "\n"; //var_dump(strpos($body, $findme)); //echo "\n"; if (strpos($body, $findme)) { return $i; } else { if ($i >= 20) { return -1; } //sleep(); //$begin = microtime(true); $nextPagePattern = "/<\\/a><a href=\"\\/(.*?)\" class=\"page-next\" trace='srp_select_pagedown'>/i"; #$nextPagePattern = "/<a href=\"\/([_-=\.\?%&a-z0-9]+?)\" class=\"page-next\" trace='srp_select_pagedown'>/i"; preg_match_all($nextPagePattern, $body, $match); //$end = microtime(true); //echo "cost time: " . ($end - $begin); //echo "\n"; //echo strpos($body, 'page-next'); //echo $body; if (!$match[1][0]) { print_r($match); return -1; } $url = $this->taobaoSearchBaseUrl . $match[1][0]; $sleepSecond = rand(2, 4); sleep($sleepSecond); $i++; echo $i . " not found\n"; return $this->getPage($url, $i); } } }
public function testUserAgent() { $this->curl->setUserAgent(Curl::USER_AGENT); $this->assertEquals(Curl::USER_AGENT, $this->server('GET', array('test' => 'server', 'key' => 'HTTP_USER_AGENT'))); }
/** * Load data * * @param string $date * @param bool $reload * @return string */ private function loadData($date, $reload = FALSE) { $this->log("load-data-start", $date); $parseDate = new DateTime($date); $myDate = new DateTime(); $myDate->sub(new DateInterval("P2D")); $data = NULL; if ($reload || !file_exists($this->originalDataDir . "/" . $date . ".html") || $parseDate->getTimestamp() > $myDate->getTimestamp()) { $curl = new \Curl(); $curl->setUserAgent("Mozilla/5.0 (compatible; NetteJabberLogParser/" . static::VERSION . "; +http://nettejabber.jdem.cz)"); $curl->setHeader("HTTP_ACCEPT", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); $curl->setHeader("HTTP_ACCEPT_LANGUAGE", "cs-cz,cs,en-us;q=0.7,en;q=0.3"); $curl->setHeader("HTTP_ACCEPT_ENCODING", "gzip,deflate"); $curl->setHeader("HTTP_ACCEPT_CHARSET", "windows-1250,utf-8;q=0.7,*;q=0.7"); $curl->setHeader("HTTP_KEEP_ALIVE", 300); $curl->setHeader("HTTP_CONNECTION", "keep-alive"); $response = $curl->get("http://nezmar.jabbim.cz/logs/nette@conf.netlab.cz/" . str_replace("-", "/", $date) . ".html"); if ($response->getHeader("Status-Code") == 200) { file_put_contents($this->originalDataDir . "/" . $date . ".html", $response->getBody()); $data = $response->getBody(); } } elseif (file_exists($this->originalDataDir . "/" . $date . ".html")) { $data = file_get_contents($this->originalDataDir . "/" . $date . ".html"); } $this->log("load-data-end", $date); return $data; }
<?php require_once '../src/Curl.php'; $curl = new Curl(); $result = $curl->setUserAgent('O mal raramente ataca o cauteloso. Com seus ouvidos escuta e com seus olhos observa. Assim explora todo homem prudente.')->get('http://httpbin.org/user-agent'); var_dump($result);