/** * Parses the result content into the headers and body, and retrieves the http code and content type * * @param RemoteContentRequest $request * @param string $content */ protected function parseResult(RemoteContentRequest $request, $content) { $headers = ''; $body = ''; $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE); $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE); // Attempt to magically convert all text'ish responses to UTF8, especially the xml and json parsers get upset if invalid UTF8 is encountered $textTypes = array('text', 'html', 'json', 'xml', 'atom'); $isTextType = false; $isXml = false; foreach ($textTypes as $textType) { if (strpos($contentType, $textType) !== false) { if ($textType === 'xml') { $isXml = true; } $isTextType = true; break; } } if ($isTextType && function_exists('mb_convert_encoding')) { // try to retrieve content type out of $charset = 'UTF-8'; $matchedCharset = array(); if (0 != preg_match("/charset\\s*=\\s*([^\"' >]*)/ix", $content, $matchedCharset) || 0 != preg_match("/encoding\\s*=\\s*[\\'\"]([^\"' >]*)/ix", $content, $matchedCharset)) { //xml declaration if (trim($matchedCharset[1])) { $charset = trim($matchedCharset[1]); if (($pos = strpos($charset, "\n")) !== false) { $charset = trim(substr($charset, 0, $pos)); } } } // the xml and json parsers get very upset if there are invalid UTF8 sequences in the string, by recoding it any bad chars will be filtered out $content = mb_convert_encoding($content, 'UTF-8', $charset); // if original charset is not utf-8 we now try to rewrite any xml declarations if ($isXml === true && strtoupper($charset) !== 'UTF-8') { $pattern = 'encoding=\\s*([\'"])' . $charset . '\\s*\\1'; $content = mb_ereg_replace($pattern, 'encoding="UTF-8"', $content, "i"); } } // on redirects and such we get multiple headers back from curl it seems, we really only want the last one while (substr($content, 0, strlen('HTTP')) == 'HTTP' && strpos($content, "\r\n\r\n") !== false) { $headers = substr($content, 0, strpos($content, "\r\n\r\n")); $content = $body = substr($content, strlen($headers) + 4); } $headers = explode("\n", $headers); $parsedHeaders = array(); foreach ($headers as $header) { if (strpos($header, ':')) { $key = trim(substr($header, 0, strpos($header, ':'))); $key = str_replace(' ', '-', ucwords(str_replace('-', ' ', $key))); $val = trim(substr($header, strpos($header, ':') + 1)); $parsedHeaders[$key] = $val; } } if (!$httpCode) { $httpCode = '404'; } if (curl_errno($request->handle)) { $httpCode = '500'; $body = 'Curl error: ' . curl_error($request->handle); } $request->setHttpCode($httpCode); $request->setHttpCodeMsg($this->resolveHttpCode($httpCode)); $request->setContentType($contentType); $request->setResponseHeaders($parsedHeaders); $request->setResponseContent($body); $request->setResponseSize(strlen($content)); }
/** * Parses the result content into the headers and body, and retrieves the http code and content type * * @param RemoteContentRequest $request * @param string $content */ private function parseResult(RemoteContentRequest $request, $content) { $headers = ''; $body = ''; $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE); $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE); // Attempt to magically convert all text'ish responses to UTF8, especially the xml and json parsers get upset if invalid UTF8 is encountered $textTypes = array('text', 'html', 'json', 'xml', 'atom'); $isTextType = false; foreach ($textTypes as $textType) { if (strpos($contentType, $textType) !== false) { $isTextType = true; break; } } if ($isTextType && function_exists('mb_convert_encoding')) { $charset = 'UTF-8'; preg_match("/charset\\s*=\\s*([^\"' >]*)/ix", $content, $charset); if (isset($charset[1])) { $charset = trim($charset[1]); if (($pos = strpos($charset, "\n")) !== false) { $charset = trim(substr($charset, 0, $pos)); } } // the xml and json parsers get very upset if there are invalid UTF8 sequences in the string, by recoding it any bad chars will be filtered out $content = mb_convert_encoding($content, 'UTF-8', $charset); } // on redirects and such we get multiple headers back from curl it seems, we really only want the last one while (substr($content, 0, strlen('HTTP')) == 'HTTP' && strpos($content, "\r\n\r\n") !== false) { $headers = substr($content, 0, strpos($content, "\r\n\r\n")); $content = $body = substr($content, strlen($headers) + 4); } $headers = explode("\n", $headers); $parsedHeaders = array(); foreach ($headers as $header) { if (strpos($header, ':')) { $key = trim(substr($header, 0, strpos($header, ':'))); $key = str_replace(' ', '-', ucwords(str_replace('-', ' ', $key))); $val = trim(substr($header, strpos($header, ':') + 1)); $parsedHeaders[$key] = $val; } } if (!$httpCode) { $httpCode = '404'; } $request->setHttpCode($httpCode); $request->setHttpCodeMsg($this->resolveHttpCode($httpCode)); $request->setContentType($contentType); $request->setResponseHeaders($parsedHeaders); $request->setResponseContent($body); $request->setResponseSize(strlen($content)); }