public function testGetEncodingFromXmlTag() { $this->assertEquals('utf-8', XmlParser::getEncodingFromXmlTag("<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet")); $this->assertEquals('utf-8', XmlParser::getEncodingFromXmlTag('<?xml version="1.0" encoding="UTF-8"?><feed xml:')); $this->assertEquals('windows-1251', XmlParser::getEncodingFromXmlTag('<?xml version="1.0" encoding="Windows-1251"?><rss version="2.0">')); $this->assertEquals('', XmlParser::getEncodingFromXmlTag("<?xml version='1.0'?><?xml-stylesheet")); }
/** * Constructor. * * @param string $content Feed content * @param string $http_encoding HTTP encoding (headers) * @param string $fallback_url Fallback url when the feed provide relative or broken url */ public function __construct($content, $http_encoding = '', $fallback_url = '') { $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); // Strip XML tag to avoid multiple encoding/decoding in the next XML processing $this->content = Filter::stripXmlTag($content); // Encode everything in UTF-8 Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $http_encoding . '" ; XML Encoding "' . $xml_encoding . '"'); $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); $this->itemPostProcessor = new ItemPostProcessor($this->config); $this->itemPostProcessor->register(new ContentGeneratorProcessor($this->config)); $this->itemPostProcessor->register(new ContentFilterProcessor($this->config)); }
/** * Constructor * * @access public * @param string $content Feed content * @param string $http_encoding HTTP encoding (headers) * @param string $fallback_url Fallback url when the feed provide relative or broken url */ public function __construct($content, $http_encoding = '', $fallback_url = '') { $this->date = new DateParser(); $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); // Strip XML tag to avoid multiple encoding/decoding in the next XML processing $this->content = Filter::stripXmlTag($content); // Encode everything in UTF-8 Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $http_encoding . '" ; XML Encoding "' . $xml_encoding . '"'); $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); // Workarounds $this->content = Filter::normalizeData($this->content); }