Пример #1
0
	function &__parse(&$strXMLText)
	{
		$TagStack = array();

		$oXMLDocument = new CUpdatesXMLDocument();

		// stip the !doctype
		$strXMLText = &preg_replace("%<\!DOCTYPE.*?>%is", "", $strXMLText);

		// get document version and encoding from header
		preg_match_all("#<\?(.*?)\?>#i", $strXMLText, $arXMLHeader_tmp);
		foreach ($arXMLHeader_tmp[0] as $strXMLHeader_tmp)
		{
			preg_match_all("/([a-zA-Z:]+=\".*?\")/i", $strXMLHeader_tmp, $arXMLParam_tmp);
			foreach ($arXMLParam_tmp[0] as $strXMLParam_tmp)
			{
				if (strlen($strXMLParam_tmp)>0)
				{
					$arXMLAttribute_tmp = explode("=\"", $strXMLParam_tmp);
					if ($arXMLAttribute_tmp[0]=="version")
						$oXMLDocument->version = substr($arXMLAttribute_tmp[1], 0, strlen($arXMLAttribute_tmp[1]) - 1);
					elseif ($arXMLAttribute_tmp[0]=="encoding")
						$oXMLDocument->encoding = substr($arXMLAttribute_tmp[1], 0, strlen($arXMLAttribute_tmp[1]) - 1);
				}
			}
		}

		// strip header
		$strXMLText = &preg_replace("#<\?.*?\?>#", "", $strXMLText);

		// strip comments
		$strXMLText = &CUpdatesXML::__stripComments($strXMLText);

		$oXMLDocument->root = &$oXMLDocument->children;
		$currentNode = &$oXMLDocument;

		$pos = 0;
		$endTagPos = 0;
		while ($pos < strlen($strXMLText))
		{
			$char = substr($strXMLText, $pos, 1);
			if ($char == "<")
			{
				// find tag name
				$endTagPos = strpos($strXMLText, ">", $pos);

				// tag name with attributes
				$tagName = substr($strXMLText, $pos + 1, $endTagPos - ($pos + 1));

				// check if it's an endtag </tagname>
				if (substr($tagName, 0, 1) == "/")
				{
					$lastNodeArray = array_pop($TagStack);
					$lastTag = $lastNodeArray["TagName"];

					$lastNode = &$lastNodeArray["ParentNodeObject"];

					unset($currentNode);
					$currentNode = &$lastNode;

					$tagName = substr($tagName, 1, strlen($tagName));

					// strip out namespace; nameSpace:Name
					$colonPos = strpos($tagName, ":");

					if ($colonPos > 0)
						$tagName = substr($tagName, $colonPos + 1, strlen($tagName));

					if ($lastTag != $tagName)
					{
						print("Error parsing XML, unmatched tags $tagName");
						return false;
					}
				}
				else
				{
					$firstSpaceEnd = strpos($tagName, " ");
					$firstNewlineEnd = strpos($tagName, "\n");

					if ($firstNewlineEnd != false)
					{
						if ($firstSpaceEnd != false)
						{
							$tagNameEnd = min($firstSpaceEnd, $firstNewlineEnd);
						}
						else
						{
							$tagNameEnd = $firstNewlineEnd;
						}
					}
					else
					{
						if ($firstSpaceEnd != false)
						{
							$tagNameEnd = $firstSpaceEnd;
						}
						else
						{
							$tagNameEnd = 0;
						}
					}

					if ($tagNameEnd > 0)
					{
						$justName = substr($tagName, 0, $tagNameEnd);
					}
					else
						$justName = $tagName;


					// strip out namespace; nameSpace:Name
					$colonPos = strpos($justName, ":");

					if ($colonPos > 0)
						$justName = substr($justName, $colonPos + 1, strlen($justName));

					// remove trailing / from the name if exists
					if (substr($justName, strlen($justName) - 1, 1) == "/")
					{
						$justName = substr($justName, 0, strlen($justName) - 1);
					}


					// check for CDATA
					$cdataSection = "";
					$isCDATASection = false;
					$cdataPos = strpos($strXMLText, "<![CDATA[", $pos);
					if ($cdataPos == $pos && $pos > 0)
					{
						$isCDATASection = true;
						$endTagPos = strpos($strXMLText, "]]>", $cdataPos);
						$cdataSection = &substr($strXMLText, $cdataPos + 9, $endTagPos - ( $cdataPos + 9));

						// new CDATA node
						unset($subNode);
						$subNode = new CUpdatesXMLNode();
						$subNode->name = "cdata-section";
						$subNode->content = $cdataSection;

						$currentNode->children[] = &$subNode;

						$pos = $endTagPos;
						$endTagPos += 2;
					}
					else
					{
						// normal start tag
						unset($subNode);
						$subNode = new CUpdatesXMLNode();
						$subNode->name = $justName;

						$currentNode->children[] = &$subNode;
					}

					// find attributes
					if ($tagNameEnd > 0)
					{
						$attributePart = &substr($tagName, $tagNameEnd, strlen($tagName));

						// attributes
						unset($attr);
						$attr = &CUpdatesXML::__parseAttributes($attributePart);

						if ($attr != false)
							$subNode->attributes = &$attr;
					}

					// check it it's a oneliner: <tagname /> or a cdata section
					if ($isCDATASection == false)
						if (substr($tagName, strlen($tagName) - 1, 1) != "/")
						{
							array_push($TagStack,
								array("TagName" => $justName, "ParentNodeObject" => &$currentNode));

							unset($currentNode);
							$currentNode = &$subNode;
						}
				}
			}

			$pos = strpos($strXMLText, "<", $pos + 1);

			if ($pos == false)
			{
				// end of document
				$pos = strlen($strXMLText);
			}
			else
			{
				// content tag
				$tagContent = substr($strXMLText, $endTagPos + 1, $pos - ($endTagPos + 1));

				if (($this->TrimWhiteSpace && (trim($tagContent)!="")) || !$this->TrimWhiteSpace)
				{
					unset($subNode);

					// convert special chars
					$tagContent = &str_replace("&gt;", ">", $tagContent);
					$tagContent = &str_replace("&lt;", "<", $tagContent);
					$tagContent = &str_replace("&apos;", "'", $tagContent);
					$tagContent = &str_replace("&quot;", '"', $tagContent);
					$tagContent = &str_replace("&amp;", "&", $tagContent);

					$currentNode->content = $tagContent;
				}
			}
		}

		return $oXMLDocument;
	}