Esempio n. 1
0
 /**
  * Parses the input code and returns the OPT XML tree.
  *
  * @param String $filename The file name (for debug purposes)
  * @param String &$code The code to parse
  * @return Opt_Xml_Root
  */
 public function parse($filename, &$code)
 {
     $current = $tree = new Opt_Xml_Root();
     $codeSize = strlen($code);
     $encoding = $this->_tpl->charset;
     // First we have to find the prolog and DTD. Then we will be able to parse tags.
     if ($this->_mode == 0) {
         // Find and parse XML prolog
         $endProlog = 0;
         $endDoctype = 0;
         if (substr($code, 0, 5) == '<?xml') {
             $endProlog = strpos($code, '?>', 5);
             if ($endProlog === false) {
                 throw new Opt_XmlInvalidProlog_Exception('prolog ending is missing');
             }
             $values = $this->_compileProlog(substr($code, 5, $endProlog - 5));
             $endProlog += 2;
             if (!$this->_tpl->prologRequired) {
                 // The prolog must be displayed
                 $tree->setProlog(new Opt_Xml_Prolog($values));
             }
         }
         // Skip white spaces
         for ($i = $endProlog; $i < $codeSize; $i++) {
             if ($code[$i] != ' ' && $code[$i] != '	' && $code[$i] != "\r" && $code[$i] != "\n") {
                 break;
             }
         }
         // Try to find doctype at the new position.
         $possibleDoctype = substr($code, $i, 9);
         if ($possibleDoctype == '<!doctype' || $possibleDoctype == '<!DOCTYPE') {
             // OK, we've found it, now determine the doctype end.
             $bracketCounter = 0;
             $doctypeStart = $i;
             for ($i += 9; $i < $codeSize; $i++) {
                 if ($code[$i] == '<') {
                     $bracketCounter++;
                 } else {
                     if ($code[$i] == '>') {
                         if ($bracketCounter == 0) {
                             $endDoctype = $i;
                             break;
                         }
                         $bracketCounter--;
                     }
                 }
             }
             if ($endDoctype == 0) {
                 throw new Opt_XmlInvalidDoctype_Exception('doctype ending is missing');
             }
             if (!$this->_tpl->prologRequired) {
                 $tree->setDtd(new Opt_Xml_Dtd(substr($code, $doctypeStart, $i - $doctypeStart + 1)));
             }
             $endDoctype++;
         } else {
             $endDoctype = $endProlog;
         }
         // OK, now skip that part.
         $code = substr($code, $endDoctype, $codeSize);
         // In the quirks mode, some results from the regular expression parser are
         // moved by one position, so we must add some dynamics here.
         $attributeCell = 5;
         $endingSlashCell = 6;
         $tagExpression = $this->_rXmlTagExpression;
     } else {
         $tagExpression = $this->_rQuirksTagExpression;
         $attributeCell = 6;
         $endingSlashCell = 7;
     }
     // Split through the general groups (cdata-content)
     $groups = preg_split($this->_rCDataExpression, $code, 0, PREG_SPLIT_DELIM_CAPTURE);
     $groupCnt = sizeof($groups);
     $groupState = 0;
     Opt_Xml_Cdata::$mode = $this->_mode;
     for ($k = 0; $k < $groupCnt; $k++) {
         // Process CDATA
         if ($groupState == 0 && $groups[$k] == '<![CDATA[') {
             $cdata = new Opt_Xml_Cdata('');
             $cdata->set('cdata', true);
             $groupState = 1;
             continue;
         }
         if ($groupState == 1) {
             if ($groups[$k] == ']]>') {
                 $current = $this->_treeTextAppend($current, $cdata);
                 $groupState = 0;
             } else {
                 $cdata->appendData($groups[$k]);
             }
             continue;
         }
         $subgroups = preg_split($this->_rCommentExpression, $groups[$k], 0, PREG_SPLIT_DELIM_CAPTURE);
         $subgroupCnt = sizeof($subgroups);
         $subgroupState = 0;
         for ($i = 0; $i < $subgroupCnt; $i++) {
             // Process comments
             if ($subgroupState == 0 && $subgroups[$i] == '<!--') {
                 $commentNode = new Opt_Xml_Comment();
                 $subgroupState = 1;
                 continue;
             }
             if ($subgroupState == 1) {
                 if ($subgroups[$i] == '-->') {
                     $current->appendChild($commentNode);
                     $subgroupState = 0;
                 } else {
                     $commentNode->appendData($subgroups[$i]);
                 }
                 continue;
             } elseif ($subgroups[$i] == '-->') {
                 throw new Opt_XmlInvalidCharacter_Exception('--&gt;');
             }
             // Find XML tags
             preg_match_all($tagExpression, $subgroups[$i], $result, PREG_SET_ORDER);
             /*
              * Output field description for $result array:
              *  0 - original content
              *  1 - tag content (without delimiters)
              *  2 - /, if enclosing tag
              *  3 - name
              *  4 - arguments (5 in quirks mode)
              *  5 - /, if enclosing tag without subcontent (6 in quirks mode)
              */
             $resultSize = sizeof($result);
             $offset = 0;
             for ($j = 0; $j < $resultSize; $j++) {
                 // Copy the remaining text to the text node
                 $id = strpos($subgroups[$i], $result[$j][0], $offset);
                 if ($id > $offset) {
                     $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, $id - $offset));
                 }
                 $offset = $id + strlen($result[$j][0]);
                 if (!isset($result[$j][$endingSlashCell])) {
                     $result[$j][$endingSlashCell] = '';
                 }
                 // Process the argument list
                 $attributes = array();
                 if (!empty($result[$j][$attributeCell])) {
                     // Just for sure...
                     $result[$j][$attributeCell] = trim($result[$j][$attributeCell]);
                     $oldLength = strlen($result[$j][$attributeCell]);
                     $result[$j][$attributeCell] = rtrim($result[$j][$attributeCell], '/');
                     if (strlen($result[$j][$attributeCell]) != $oldLength) {
                         $result[$j][$endingSlashCell] = '/';
                     }
                     $attributes = $this->_compileAttributes($result[$j][$attributeCell]);
                     if (!is_array($attributes)) {
                         throw new Opt_XmlInvalidAttribute_Exception($result[$j][0]);
                     }
                 }
                 // Recognize the tag type
                 if ($result[$j][3] != '/') {
                     // Opening tag
                     $node = new Opt_Xml_Element($result[$j][4]);
                     $node->set('single', $result[$j][$endingSlashCell] == '/');
                     foreach ($attributes as $name => $value) {
                         $node->addAttribute($anode = new Opt_Xml_Attribute($name, $value));
                     }
                     $current = $this->_treeNodeAppend($current, $node, $result[$j][$endingSlashCell] != '/');
                 } elseif ($result[$j][3] == '/') {
                     if (sizeof($attributes) > 0) {
                         throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]);
                     }
                     if ($current instanceof Opt_Xml_Element) {
                         if ($current->getXmlName() != $result[$j][4]) {
                             throw new Opt_XmlInvalidOrder_Exception($result[$j][4], $current->getXmlName());
                         }
                     } else {
                         throw new Opt_XmlInvalidOrder_Exception($result[$j][4], 'NULL');
                     }
                     $current = $this->_treeJumpOut($current);
                 } else {
                     throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]);
                 }
             }
             if (strlen($subgroups[$i]) > $offset) {
                 $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, strlen($subgroups[$i]) - $offset));
             }
         }
     }
     // Testing if everything was closed.
     if ($current !== $tree) {
         // Error handling - determine the name of the unclosed tag.
         while (!$current instanceof Opt_Xml_Element) {
             $current = $current->getParent();
         }
         throw new Opt_UnclosedTag_Exception($current->getXmlName());
     }
     if ($this->_mode == 0 && $this->_tpl->singleRootNode) {
         // TODO: The current code does not check the contents of Opt_Text_Nodes and other root elements
         // that may contain invalid and valid XML syntax at the same time.
         // For now, this code is frozen, we'll think a bit about it in the future. Maybe nobody
         // will notice this :)
         $elementFound = false;
         foreach ($tree as $item) {
             if ($item instanceof Opt_Xml_Element) {
                 if ($elementFound) {
                     // Oops, there is already another root node!
                     throw new Opt_XmlRootElement_Exception($item->getXmlName());
                 }
                 $elementFound = true;
             }
         }
     }
     return $tree;
 }
Esempio n. 2
0
 public function testRootGetType()
 {
     $buffer = new Opt_Xml_Root();
     $this->assertEquals('Opt_Xml_Root', $buffer->getType());
 }