public function testHeadersParseValueHasColon() { $raw = <<<EOD HTTP/1.1 200 OK Content-type: text/html Accept-Ranges: bytes ETag: "1805770918" Last-Modified: Tue, 20 Nov 2012 02:11:45 GMT Content-Length: 611 nnCoection: close Date: Thu, 29 May 2014 20:53:13 GMT Connection: Keep-alive Content-Location: http://somwhere.else EOD; $headers = AmberNetworkUtils::extract_headers($raw); $this->assertEquals($headers["Content-Location"], "http://somwhere.else"); }
public function testHeadersParseCaseSensitive() { $raw = <<<EOD HTTP/1.1 200 OK Content-type: text/html Accept-Ranges: bytes ETag: "1805770918" Last-Modified: Tue, 20 Nov 2012 02:11:45 GMT Content-Length: 611 nnCoection: close Date: Thu, 29 May 2014 20:53:13 GMT Connection: Keep-alive EOD; $headers = AmberNetworkUtils::extract_headers($raw); $this->assertEquals($headers["Content-Type"], "text/html"); }
/** * Open a single URL, and return an array with dictionary of header information and the contents * of the URL. Handle redirects ourselves, rather than using CURLOPT_FOLLOWLOCATION * Adapted from http://slopjong.de/2012/03/31/curl-follow-locations-with-safe_mode-enabled-or-open_basedir-set/ * @param $url string of resource to download * @return array dictionary of header information and a stream to the contents of the URL */ public static function open_single_url($url, $additional_options = array(), $follow_redirects = TRUE) { $options = array(CURLOPT_FAILONERROR => TRUE, CURLOPT_FOLLOWLOCATION => FALSE, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_TIMEOUT => 10, CURLOPT_RETURNTRANSFER => 1, CURLOPT_HEADER => TRUE, CURLOPT_USERAGENT => AmberNetworkUtils::get_user_agent_string(), CURLOPT_ENCODING => ''); $max_redirects = 5; try { $ch = curl_init($url); if (curl_setopt_array($ch, $additional_options + $options) === FALSE) { throw new RuntimeException(join(":", array(__FILE__, __METHOD__, "Error setting CURL options", $url, curl_error($ch)))); } $original_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $newurl = $original_url; do { curl_setopt($ch, CURLOPT_URL, $newurl); $response = curl_exec($ch); $response_info = curl_getinfo($ch); if ($response_info['http_code'] == 301 || $response_info['http_code'] == 302) { $newurl = $response_info['redirect_url']; } else { if ($meta = AmberNetworkUtils::find_meta_redirect($response)) { $newurl = $meta; } else { break; // Not a redirect, so we're done } } // if no scheme is present then the new url is a relative path and thus needs some extra care if (!preg_match("/^https?:/i", $newurl)) { $last_slash = strrpos($original_url, "/", 9); // Starting at position 9 starts search past http:// if ($last_slash == strlen($original_url) - 1) { $newurl = $original_url . $newurl; } else { if ($last_slash === FALSE) { $newurl = join("/", array($original_url, $newurl)); } else { $newurl = join("/", array(substr($original_url, 0, $last_slash), $newurl)); } } } } while (--$max_redirects && $follow_redirects); curl_close($ch); } catch (RuntimeException $e) { error_log($e->getMessage()); curl_close($ch); return FALSE; } if (!$max_redirects) { return FALSE; // We ran out of redirects without getting a result } else { /* Split into header and body */ $header_size = $response_info['header_size']; $header = substr($response, 0, $header_size - 1); $body = substr($response, $header_size); $headers = AmberNetworkUtils::extract_headers($header); return array("headers" => $headers, "body" => $body, "info" => $response_info); } }