Ejemplo n.º 1
0
 /**
  * Will return an DOM object tree from the well formed XML.
  *
  * @param string $strXMLText
  * @return CDataXMLDocument
  */
 function __parse(&$strXMLText)
 {
     static $search = array(">", "<", "'", """, "&");
     static $replace = array(">", "<", "'", '"', "&");
     $oXMLDocument = new CDataXMLDocument();
     // strip comments
     $strXMLText =& CDataXML::__stripComments($strXMLText);
     // stip the !doctype
     // The DOCTYPE declaration can consists of an internal DTD in square brackets
     $cnt = 0;
     $strXMLText = preg_replace("%<\\!DOCTYPE[^\\[>]*\\[.*?\\]>%is", "", $strXMLText, -1, $cnt);
     if ($cnt == 0) {
         $strXMLText = preg_replace("%<\\!DOCTYPE[^>]*>%is", "", $strXMLText);
     }
     // get document version and encoding from header
     preg_match_all("#<\\?(.*?)\\?>#i", $strXMLText, $arXMLHeader_tmp);
     foreach ($arXMLHeader_tmp[0] as $strXMLHeader_tmp) {
         preg_match_all("/([a-zA-Z:]+=\".*?\")/i", $strXMLHeader_tmp, $arXMLParam_tmp);
         foreach ($arXMLParam_tmp[0] as $strXMLParam_tmp) {
             if ($strXMLParam_tmp != '') {
                 $arXMLAttribute_tmp = explode("=\"", $strXMLParam_tmp);
                 if ($arXMLAttribute_tmp[0] == "version") {
                     $oXMLDocument->version = substr($arXMLAttribute_tmp[1], 0, strlen($arXMLAttribute_tmp[1]) - 1);
                 } elseif ($arXMLAttribute_tmp[0] == "encoding") {
                     $oXMLDocument->encoding = substr($arXMLAttribute_tmp[1], 0, strlen($arXMLAttribute_tmp[1]) - 1);
                 }
             }
         }
     }
     // strip header
     $strXMLText =& preg_replace("#<\\?.*?\\?>#", "", $strXMLText);
     $oXMLDocument->root =& $oXMLDocument->children;
     /** @var CDataXMLNode $currentNode */
     $currentNode =& $oXMLDocument;
     $tok = strtok($strXMLText, "<");
     $arTag = explode(">", $tok);
     if (count($arTag) < 2) {
         //There was whitespace before <, so make another try
         $tok = strtok("<");
         $arTag = explode(">", $tok);
         if (count($arTag) < 2) {
             //It's a broken XML
             return false;
         }
     }
     while ($tok !== false) {
         $tagName = $arTag[0];
         $tagContent = $arTag[1];
         // find tag name with attributes
         // check if it's an endtag </tagname>
         if ($tagName[0] == "/") {
             $tagName = substr($tagName, 1);
             // strip out namespace; nameSpace:Name
             if ($this->delete_ns) {
                 $colonPos = strpos($tagName, ":");
                 if ($colonPos > 0) {
                     $tagName = substr($tagName, $colonPos + 1);
                 }
             }
             if ($currentNode->name != $tagName) {
                 // Error parsing XML, unmatched tags $tagName
                 return false;
             }
             $currentNode = $currentNode->_parent;
             // convert special chars
             if (!$this->TrimWhiteSpace || trim($tagContent) != "") {
                 $currentNode->content = str_replace($search, $replace, $tagContent);
             }
         } elseif (strncmp($tagName, "![CDATA[", 8) === 0) {
             //because cdata may contain > and < chars
             //it is special processing needed
             $cdata = "";
             for ($i = 0, $c = count($arTag); $i < $c; $i++) {
                 $cdata .= $arTag[$i] . ">";
                 if (substr($cdata, -3) == "]]>") {
                     $tagContent = $arTag[$i + 1];
                     break;
                 }
             }
             if (substr($cdata, -3) != "]]>") {
                 $cdata = substr($cdata, 0, -1) . "<";
                 do {
                     $tok = strtok(">");
                     //unfortunatly strtok eats > followed by >
                     $cdata .= $tok . ">";
                     //util end of string or end of cdata found
                 } while ($tok !== false && substr($tok, -2) != "]]");
                 //$tagName = substr($tagName, 0, -1);
             }
             $cdataSection = substr($cdata, 8, -3);
             // new CDATA node
             $subNode = new CDataXMLNode();
             $subNode->name = "cdata-section";
             $subNode->content = $cdataSection;
             $currentNode->children[] = $subNode;
             $currentNode->content .= $subNode->content;
             // convert special chars
             if (!$this->TrimWhiteSpace || trim($tagContent) != "") {
                 $currentNode->content = str_replace($search, $replace, $tagContent);
             }
         } else {
             // normal start tag
             $firstSpaceEnd = strpos($tagName, " ");
             $firstNewlineEnd = strpos($tagName, "\n");
             if ($firstNewlineEnd != false) {
                 if ($firstSpaceEnd != false) {
                     $tagNameEnd = min($firstSpaceEnd, $firstNewlineEnd);
                 } else {
                     $tagNameEnd = $firstNewlineEnd;
                 }
             } else {
                 if ($firstSpaceEnd != false) {
                     $tagNameEnd = $firstSpaceEnd;
                 } else {
                     $tagNameEnd = 0;
                 }
             }
             if ($tagNameEnd > 0) {
                 $justName = substr($tagName, 0, $tagNameEnd);
             } else {
                 $justName = $tagName;
             }
             // strip out namespace; nameSpace:Name
             if ($this->delete_ns) {
                 $colonPos = strpos($justName, ":");
                 if ($colonPos > 0) {
                     $justName = substr($justName, $colonPos + 1);
                 }
             }
             // remove trailing / from the name if exists
             $justName = rtrim($justName, "/");
             $subNode = new CDataXMLNode();
             $subNode->_parent = $currentNode;
             $subNode->name = $justName;
             // find attributes
             if ($tagNameEnd > 0) {
                 $attributePart = substr($tagName, $tagNameEnd);
                 // attributes
                 unset($attr);
                 $attr = CDataXML::__parseAttributes($attributePart);
                 if ($attr != false) {
                     $subNode->attributes = $attr;
                 }
             }
             // convert special chars
             if (!$this->TrimWhiteSpace || trim($tagContent) != "") {
                 $subNode->content = str_replace($search, $replace, $tagContent);
             }
             $currentNode->children[] = $subNode;
             if (substr($tagName, -1) != "/") {
                 $currentNode = $subNode;
             }
         }
         //Next iteration
         $tok = strtok("<");
         $arTag = explode(">", $tok);
         //There was whitespace before < just after CDATA section, so make another try
         if (count($arTag) < 2 && strncmp($tagName, "![CDATA[", 8) === 0) {
             $currentNode->content .= $arTag[0];
             // convert special chars
             if (!$this->TrimWhiteSpace || trim($tagContent) != "") {
                 $currentNode->content = str_replace($search, $replace, $tagContent);
             }
             $tok = strtok("<");
             $arTag = explode(">", $tok);
         }
     }
     return $oXMLDocument;
 }