示例#1
0
 function parse(&$data, $encoding)
 {
     // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
     if (strtoupper($encoding) == 'US-ASCII') {
         $this->encoding = 'UTF-8';
     } else {
         $this->encoding = $encoding;
     }
     // Strip BOM:
     // UTF-32 Big Endian BOM
     if (substr($data, 0, 4) === "��") {
         $data = substr($data, 4);
     } elseif (substr($data, 0, 4) === "��") {
         $data = substr($data, 4);
     } elseif (substr($data, 0, 2) === "��") {
         $data = substr($data, 2);
     } elseif (substr($data, 0, 2) === "��") {
         $data = substr($data, 2);
     } elseif (substr($data, 0, 3) === "") {
         $data = substr($data, 3);
     }
     if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\t\n\r ") && ($pos = strpos($data, '?>')) !== false) {
         $declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
         if ($declaration->parse()) {
             $data = substr($data, $pos + 2);
             $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . ($declaration->standalone ? 'yes' : 'no') . '"?>' . $data;
         } else {
             $this->error_string = 'SimplePie bug! Please report this!';
             return false;
         }
     }
     // Work around libxml bug
     $data = str_replace('&lt;', '&#60;', $data);
     $data = str_replace('&gt;', '&#62;', $data);
     $data = str_replace('&amp;', '&#38;', $data);
     $data = str_replace('&apos;', '&#39;', $data);
     $data = str_replace('&quot;', '&#34;', $data);
     $return = true;
     // Create the parser
     $xml = xml_parser_create_ns($this->encoding, $this->separator);
     xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
     xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
     xml_set_object($xml, $this);
     xml_set_character_data_handler($xml, 'cdata');
     xml_set_element_handler($xml, 'tag_open', 'tag_close');
     // Parse!
     if (!xml_parse($xml, $data, true)) {
         $this->error_code = xml_get_error_code($xml);
         $this->error_string = xml_error_string($this->error_code);
         $return = false;
     }
     $this->current_line = xml_get_current_line_number($xml);
     $this->current_column = xml_get_current_column_number($xml);
     $this->current_byte = xml_get_current_byte_index($xml);
     xml_parser_free($xml);
     return $return;
 }
示例#2
0
 public function parse(&$data, $encoding)
 {
     if (strtoupper($encoding) === 'US-ASCII') {
         $this->encoding = 'UTF-8';
     } else {
         $this->encoding = $encoding;
     }
     if (substr($data, 0, 4) === "��") {
         $data = substr($data, 4);
     } elseif (substr($data, 0, 4) === "��") {
         $data = substr($data, 4);
     } elseif (substr($data, 0, 2) === "��") {
         $data = substr($data, 2);
     } elseif (substr($data, 0, 2) === "��") {
         $data = substr($data, 2);
     } elseif (substr($data, 0, 3) === "") {
         $data = substr($data, 3);
     }
     if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\t\n\r ") && ($pos = strpos($data, '?>')) !== false) {
         $declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
         if ($declaration->parse()) {
             $data = substr($data, $pos + 2);
             $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . ($declaration->standalone ? 'yes' : 'no') . '"?>' . $data;
         } else {
             $this->error_string = 'SimplePie bug! Please report this!';
             return false;
         }
     }
     $return = true;
     static $xml_is_sane = null;
     if ($xml_is_sane === null) {
         $parser_check = xml_parser_create();
         xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
         xml_parser_free($parser_check);
         $xml_is_sane = isset($values[0]['value']);
     }
     if ($xml_is_sane) {
         $xml = xml_parser_create_ns($this->encoding, $this->separator);
         xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
         xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
         xml_set_object($xml, $this);
         xml_set_character_data_handler($xml, 'cdata');
         xml_set_element_handler($xml, 'tag_open', 'tag_close');
         if (!xml_parse($xml, $data, true)) {
             $this->error_code = xml_get_error_code($xml);
             $this->error_string = xml_error_string($this->error_code);
             $return = false;
         }
         $this->current_line = xml_get_current_line_number($xml);
         $this->current_column = xml_get_current_column_number($xml);
         $this->current_byte = xml_get_current_byte_index($xml);
         xml_parser_free($xml);
         return $return;
     } else {
         libxml_clear_errors();
         $xml = new XMLReader();
         $xml->xml($data);
         while (@$xml->read()) {
             switch ($xml->nodeType) {
                 case constant('XMLReader::END_ELEMENT'):
                     if ($xml->namespaceURI !== '') {
                         $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
                     } else {
                         $tagName = $xml->localName;
                     }
                     $this->tag_close(null, $tagName);
                     break;
                 case constant('XMLReader::ELEMENT'):
                     $empty = $xml->isEmptyElement;
                     if ($xml->namespaceURI !== '') {
                         $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
                     } else {
                         $tagName = $xml->localName;
                     }
                     $attributes = array();
                     while ($xml->moveToNextAttribute()) {
                         if ($xml->namespaceURI !== '') {
                             $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
                         } else {
                             $attrName = $xml->localName;
                         }
                         $attributes[$attrName] = $xml->value;
                     }
                     $this->tag_open(null, $tagName, $attributes);
                     if ($empty) {
                         $this->tag_close(null, $tagName);
                     }
                     break;
                 case constant('XMLReader::TEXT'):
                 case constant('XMLReader::CDATA'):
                     $this->cdata(null, $xml->value);
                     break;
             }
         }
         if ($error = libxml_get_last_error()) {
             $this->error_code = $error->code;
             $this->error_string = $error->message;
             $this->current_line = $error->line;
             $this->current_column = $error->column;
             return false;
         } else {
             return true;
         }
     }
 }
示例#3
0
 /**
  * Detect XML encoding, as per XML 1.0 Appendix F.1
  *
  * @todo Add support for EBCDIC
  * @param string $data XML data
  * @return array Possible encodings
  */
 public static function xml_encoding($data)
 {
     // UTF-32 Big Endian BOM
     if (substr($data, 0, 4) === "��") {
         $encoding[] = 'UTF-32BE';
     } elseif (substr($data, 0, 4) === "��") {
         $encoding[] = 'UTF-32LE';
     } elseif (substr($data, 0, 2) === "��") {
         $encoding[] = 'UTF-16BE';
     } elseif (substr($data, 0, 2) === "��") {
         $encoding[] = 'UTF-16LE';
     } elseif (substr($data, 0, 3) === "") {
         $encoding[] = 'UTF-8';
     } elseif (substr($data, 0, 20) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-32BE';
     } elseif (substr($data, 0, 20) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-32LE';
     } elseif (substr($data, 0, 10) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-16BE';
     } elseif (substr($data, 0, 10) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-16LE';
     } elseif (substr($data, 0, 5) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-8';
     } else {
         $encoding[] = 'UTF-8';
     }
     return $encoding;
 }
示例#4
0
	/**
	 * Detect XML encoding, as per XML 1.0 Appendix F.1
	 *
	 * @todo Add support for EBCDIC
	 * @param string $data XML data
	 * @return array Possible encodings
	 */
	public static function xml_encoding($data)
	{
		// UTF-32 Big Endian BOM
		if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
		{
			$encoding[] = 'UTF-32BE';
		}
		// UTF-32 Little Endian BOM
		elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
		{
			$encoding[] = 'UTF-32LE';
		}
		// UTF-16 Big Endian BOM
		elseif (substr($data, 0, 2) === "\xFE\xFF")
		{
			$encoding[] = 'UTF-16BE';
		}
		// UTF-16 Little Endian BOM
		elseif (substr($data, 0, 2) === "\xFF\xFE")
		{
			$encoding[] = 'UTF-16LE';
		}
		// UTF-8 BOM
		elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
		{
			$encoding[] = 'UTF-8';
		}
		// UTF-32 Big Endian Without BOM
		elseif (substr($data, 0, 20) === "\x00\x00\x00\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C")
		{
			if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-32BE';
		}
		// UTF-32 Little Endian Without BOM
		elseif (substr($data, 0, 20) === "\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C\x00\x00\x00")
		{
			if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-32LE';
		}
		// UTF-16 Big Endian Without BOM
		elseif (substr($data, 0, 10) === "\x00\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C")
		{
			if ($pos = strpos($data, "\x00\x3F\x00\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-16BE';
		}
		// UTF-16 Little Endian Without BOM
		elseif (substr($data, 0, 10) === "\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C\x00")
		{
			if ($pos = strpos($data, "\x3F\x00\x3E\x00"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-16LE';
		}
		// US-ASCII (or superset)
		elseif (substr($data, 0, 5) === "\x3C\x3F\x78\x6D\x6C")
		{
			if ($pos = strpos($data, "\x3F\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-8';
		}
		// Fallback to UTF-8
		else
		{
			$encoding[] = 'UTF-8';
		}
		return $encoding;
	}
示例#5
0
	function parse(&$data, $encoding)
	{
		// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
		if (strtoupper($encoding) == 'US-ASCII')
		{
			$this->encoding = 'UTF-8';
		}
		else
		{
			$this->encoding = $encoding;
		}

		// Strip BOM:
		// UTF-32 Big Endian BOM
		if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
		{
			$data = substr($data, 4);
		}
		// UTF-32 Little Endian BOM
		elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
		{
			$data = substr($data, 4);
		}
		// UTF-16 Big Endian BOM
		elseif (substr($data, 0, 2) === "\xFE\xFF")
		{
			$data = substr($data, 2);
		}
		// UTF-16 Little Endian BOM
		elseif (substr($data, 0, 2) === "\xFF\xFE")
		{
			$data = substr($data, 2);
		}
		// UTF-8 BOM
		elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
		{
			$data = substr($data, 3);
		}

		if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
		{
			$declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
			if ($declaration->parse())
			{
				$data = substr($data, $pos + 2);
				$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
			}
			else
			{
				$this->error_string = 'SimplePie bug! Please report this!';
				return false;
			}
		}

		$return = true;

		// Create the parser
		$xml = xml_parser_create_ns($this->encoding, $this->separator);
		xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
		xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
		xml_set_object($xml, $this);
		xml_set_character_data_handler($xml, 'cdata');
		xml_set_element_handler($xml, 'tag_open', 'tag_close');

		// Parse!
		if (!xml_parse($xml, $data, true))
		{
			$this->error_code = xml_get_error_code($xml);
			$this->error_string = xml_error_string($this->error_code);
			$return = false;
		}
		$this->current_line = xml_get_current_line_number($xml);
		$this->current_column = xml_get_current_column_number($xml);
		$this->current_byte = xml_get_current_byte_index($xml);
		xml_parser_free($xml);
		return $return;
	}