/** * Get the character set from the current web page * * * @return string|null */ public function getCharacterSet() { $this->isContentTypeMalformed = false; $metaContentTypeSelectors = array('meta[http-equiv=Content-Type]' => false, 'meta[http-equiv=content-type]' => false, 'meta[name=Content-Type]' => true); foreach ($metaContentTypeSelectors as $metaContentTypeSelector => $isMalformed) { $contentTypeString = null; @$this->getDomQuery($metaContentTypeSelector)->each(function ($index, \DOMElement $domElement) use(&$contentTypeString) { $contentTypeString = $domElement->getAttribute('content'); }); if (is_string($contentTypeString)) { $mediaTypeParser = new InternetMediaTypeParser(); $mediaTypeParser->setIgnoreInvalidAttributes(true); $mediaTypeParser->setAttemptToRecoverFromInvalidInternalCharacter(true); /* @var $mediaType InternetMediaType */ try { $mediaType = $mediaTypeParser->parse($contentTypeString); if ($mediaType->hasParameter('charset')) { $this->isContentTypeMalformed = $isMalformed; return (string) $mediaType->getParameter('charset')->getValue(); } } catch (\webignition\InternetMediaType\Parser\TypeParserException $typeParserException) { // Occurs when we can't parse the in-markup content type // Ignore such exceptions to treat this as having no in-markup content type } } } $charsetString = ''; @$this->getDomQuery('meta[charset]')->each(function ($index, \DOMElement $domElement) use(&$charsetString) { $charsetString = $domElement->getAttribute('charset'); }); if (is_string($charsetString) && $charsetString !== '') { return $charsetString; } $contentTypeString = null; @$this->getDomQuery('meta[name=Content-Type]')->each(function ($index, \DOMElement $domElement) use(&$contentTypeString) { $contentTypeString = $domElement->getAttribute('content'); }); return null; }
/** * * @param HttpResponse $response * @return \webignition\InternetMediaType\InternetMediaType */ private function getContentTypeFromResponse(HttpResponse $response) { $mediaTypeParser = new InternetMediaTypeParser(); $mediaTypeParser->setAttemptToRecoverFromInvalidInternalCharacter(true); $mediaTypeParser->setIgnoreInvalidAttributes(true); return $mediaTypeParser->parse($response->getHeader('content-type')); }