public static function makeRequest(HttpRequest $request) { $requestUri = $request->getRequestUri()->getUri(); // if the request is towards a file URL, return the response constructed // from file if (0 === strpos($requestUri, "file:///")) { return HttpResponse::fromFile($requestUri); } $httpResponse = new HttpResponse(); $curlChannel = curl_init(); curl_setopt($curlChannel, CURLOPT_URL, $requestUri); curl_setopt($curlChannel, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curlChannel, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curlChannel, CURLOPT_TIMEOUT, 10); if ($request->getRequestMethod() === "POST") { curl_setopt($curlChannel, CURLOPT_POST, 1); curl_setopt($curlChannel, CURLOPT_POSTFIELDS, $request->getContent()); } $basicAuthUser = $request->getBasicAuthUser(); $basicAuthPass = $request->getBasicAuthPass(); if (NULL !== $basicAuthUser) { $request->setHeader("Authorization", "Basic " . base64_encode($basicAuthUser . ":" . $basicAuthPass)); } // Including HTTP headers in request $headers = $request->getHeaders(TRUE); if (!empty($headers)) { curl_setopt($curlChannel, CURLOPT_HTTPHEADER, $headers); } // Connect to SSL/TLS server, validate certificate and host if ($request->getRequestUri()->getScheme() === "https") { curl_setopt($curlChannel, CURLOPT_SSL_VERIFYPEER, 1); curl_setopt($curlChannel, CURLOPT_SSL_VERIFYHOST, 2); } // Callback to extract all the HTTP headers from the response... // In order to really correctly parse HTTP headers one would have to look at RFC 2616... curl_setopt($curlChannel, CURLOPT_HEADERFUNCTION, function ($curlChannel, $header) use($httpResponse) { // Ignore Status-Line (RFC 2616, section 6.1) if (0 === preg_match('|^HTTP/\\d+.\\d+ [1-5]\\d\\d|', $header)) { // Only deal with header lines that contain a colon if (strpos($header, ":") !== FALSE) { // Only deal with header lines that contain a colon list($key, $value) = explode(":", trim($header)); $httpResponse->setHeader(trim($key), trim($value)); } } return strlen($header); }); $output = curl_exec($curlChannel); if ($errorNumber = curl_errno($curlChannel)) { throw new OutgoingHttpRequestException(curl_error($curlChannel)); } $httpResponse->setStatusCode(curl_getinfo($curlChannel, CURLINFO_HTTP_CODE)); $httpResponse->setContent($output); curl_close($curlChannel); return $httpResponse; }
/** * @return Retrieved HTTP response and elasticsearch document */ public function fetch($url, $actions, $force = false) { $esDoc = $this->es->get($url); if (isset($esDoc->status->location) && $esDoc->status->location != '') { //TODO: what if location redirects change? $url = $esDoc->status->location; $esDoc = $this->es->get($url); } $types = array(); foreach ($actions as $action) { $types = array_merge($action::$supportedTypes); } $types = array_unique($types); $req = new HttpRequest($url); $req->setHeader('accept', implode(',', $types)); if (!$force && $esDoc && isset($esDoc->status->processed) && $esDoc->status->processed != '') { $nCrawlTime = strtotime($esDoc->status->processed); $req->setHeader('If-Modified-Since: ' . gmdate('r', $nCrawlTime)); } $res = $req->send(); if ($res->getStatus() === 304) { //not modified since last time, so don't crawl again Log::info("Not modified since last fetch"); return false; } else { if ($res->getStatus() !== 200) { throw new \Exception("Response code is not 200 but " . $res->getStatus() . ", stopping"); } } $effUrl = Helper::removeAnchor($res->getEffectiveUrl()); if ($effUrl != $url) { $this->storeRedirect($url, $effUrl); $url = $effUrl; $esDoc = $this->es->get($url); } //FIXME: etag, hash on content $retrieved = new Retrieved(); $retrieved->httpRes = $res; $retrieved->esDoc = $esDoc; $retrieved->url = $url; return $retrieved; }
/** * Sign the given request; ie. add an Authorization: Digest header * and increase the internal nonce counter. * * @param peer.http.HttpRequest $request */ public function sign(HttpRequest $request) { $url = $request->target; $params = array(); if (is_array($request->parameters)) { $params = array_merge($params, $request->parameters); } if ($request->getUrl()->hasParams()) { $params = array_merge($params, $request->getUrl()->getParams()); } if (sizeof($params)) { $url .= '?'; foreach ($params as $k => $v) { $url .= $k . '=' . $v . '&'; } $url = substr($url, 0, -1); } $request->setHeader('Authorization', new Header('Authorization', $this->getValueRepresentation($request->method, $url))); // Increase internal counter $this->counter++; }
public function duplicateHeader() { $r = new HttpRequest(new URL('http://example.com/')); $r->setHeader('X-Binford', 6100); $r->setHeader('X-Binford', 61000); $this->assertEquals("GET / HTTP/1.1\r\nConnection: close\r\nHost: example.com\r\nX-Binford: 61000\r\n\r\n", $r->getRequestString()); }
/** * @param HttpRequest $req */ public function applyCookie($req) { // fetch cookies $host = $req->getHeader('host'); $path = $req->getUrlParam('path'); $cookies = $this->fetchCookieToSend($host, $path); if ($this !== $req) { $cookies = array_merge($cookies, $req->fetchCookieToSend($host, $path)); } // add to header $req->setHeader('cookie', null); foreach (array_chunk(array_values($cookies), 3) as $chunk) { $req->addHeader('cookie', implode('; ', $chunk)); } }
/** * Sign HTTP request * * @param peer.http.HttpRequest $request */ public function sign(HttpRequest $request) { $request->setHeader('Authorization', new Header('Authorization', $this->getValueRepresentation())); }