/** * Parses the input code and returns the OPT XML tree. * * @param String $filename The file name (for debug purposes) * @param String &$code The code to parse * @return Opt_Xml_Root */ public function parse($filename, &$code) { $current = $tree = new Opt_Xml_Root(); $codeSize = strlen($code); $encoding = $this->_tpl->charset; // First we have to find the prolog and DTD. Then we will be able to parse tags. if ($this->_mode == 0) { // Find and parse XML prolog $endProlog = 0; $endDoctype = 0; if (substr($code, 0, 5) == '<?xml') { $endProlog = strpos($code, '?>', 5); if ($endProlog === false) { throw new Opt_XmlInvalidProlog_Exception('prolog ending is missing'); } $values = $this->_compileProlog(substr($code, 5, $endProlog - 5)); $endProlog += 2; if (!$this->_tpl->prologRequired) { // The prolog must be displayed $tree->setProlog(new Opt_Xml_Prolog($values)); } } // Skip white spaces for ($i = $endProlog; $i < $codeSize; $i++) { if ($code[$i] != ' ' && $code[$i] != ' ' && $code[$i] != "\r" && $code[$i] != "\n") { break; } } // Try to find doctype at the new position. $possibleDoctype = substr($code, $i, 9); if ($possibleDoctype == '<!doctype' || $possibleDoctype == '<!DOCTYPE') { // OK, we've found it, now determine the doctype end. $bracketCounter = 0; $doctypeStart = $i; for ($i += 9; $i < $codeSize; $i++) { if ($code[$i] == '<') { $bracketCounter++; } else { if ($code[$i] == '>') { if ($bracketCounter == 0) { $endDoctype = $i; break; } $bracketCounter--; } } } if ($endDoctype == 0) { throw new Opt_XmlInvalidDoctype_Exception('doctype ending is missing'); } if (!$this->_tpl->prologRequired) { $tree->setDtd(new Opt_Xml_Dtd(substr($code, $doctypeStart, $i - $doctypeStart + 1))); } $endDoctype++; } else { $endDoctype = $endProlog; } // OK, now skip that part. $code = substr($code, $endDoctype, $codeSize); // In the quirks mode, some results from the regular expression parser are // moved by one position, so we must add some dynamics here. $attributeCell = 5; $endingSlashCell = 6; $tagExpression = $this->_rXmlTagExpression; } else { $tagExpression = $this->_rQuirksTagExpression; $attributeCell = 6; $endingSlashCell = 7; } // Split through the general groups (cdata-content) $groups = preg_split($this->_rCDataExpression, $code, 0, PREG_SPLIT_DELIM_CAPTURE); $groupCnt = sizeof($groups); $groupState = 0; Opt_Xml_Cdata::$mode = $this->_mode; for ($k = 0; $k < $groupCnt; $k++) { // Process CDATA if ($groupState == 0 && $groups[$k] == '<![CDATA[') { $cdata = new Opt_Xml_Cdata(''); $cdata->set('cdata', true); $groupState = 1; continue; } if ($groupState == 1) { if ($groups[$k] == ']]>') { $current = $this->_treeTextAppend($current, $cdata); $groupState = 0; } else { $cdata->appendData($groups[$k]); } continue; } $subgroups = preg_split($this->_rCommentExpression, $groups[$k], 0, PREG_SPLIT_DELIM_CAPTURE); $subgroupCnt = sizeof($subgroups); $subgroupState = 0; for ($i = 0; $i < $subgroupCnt; $i++) { // Process comments if ($subgroupState == 0 && $subgroups[$i] == '<!--') { $commentNode = new Opt_Xml_Comment(); $subgroupState = 1; continue; } if ($subgroupState == 1) { if ($subgroups[$i] == '-->') { $current->appendChild($commentNode); $subgroupState = 0; } else { $commentNode->appendData($subgroups[$i]); } continue; } elseif ($subgroups[$i] == '-->') { throw new Opt_XmlInvalidCharacter_Exception('-->'); } // Find XML tags preg_match_all($tagExpression, $subgroups[$i], $result, PREG_SET_ORDER); /* * Output field description for $result array: * 0 - original content * 1 - tag content (without delimiters) * 2 - /, if enclosing tag * 3 - name * 4 - arguments (5 in quirks mode) * 5 - /, if enclosing tag without subcontent (6 in quirks mode) */ $resultSize = sizeof($result); $offset = 0; for ($j = 0; $j < $resultSize; $j++) { // Copy the remaining text to the text node $id = strpos($subgroups[$i], $result[$j][0], $offset); if ($id > $offset) { $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, $id - $offset)); } $offset = $id + strlen($result[$j][0]); if (!isset($result[$j][$endingSlashCell])) { $result[$j][$endingSlashCell] = ''; } // Process the argument list $attributes = array(); if (!empty($result[$j][$attributeCell])) { // Just for sure... $result[$j][$attributeCell] = trim($result[$j][$attributeCell]); $oldLength = strlen($result[$j][$attributeCell]); $result[$j][$attributeCell] = rtrim($result[$j][$attributeCell], '/'); if (strlen($result[$j][$attributeCell]) != $oldLength) { $result[$j][$endingSlashCell] = '/'; } $attributes = $this->_compileAttributes($result[$j][$attributeCell]); if (!is_array($attributes)) { throw new Opt_XmlInvalidAttribute_Exception($result[$j][0]); } } // Recognize the tag type if ($result[$j][3] != '/') { // Opening tag $node = new Opt_Xml_Element($result[$j][4]); $node->set('single', $result[$j][$endingSlashCell] == '/'); foreach ($attributes as $name => $value) { $node->addAttribute($anode = new Opt_Xml_Attribute($name, $value)); } $current = $this->_treeNodeAppend($current, $node, $result[$j][$endingSlashCell] != '/'); } elseif ($result[$j][3] == '/') { if (sizeof($attributes) > 0) { throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]); } if ($current instanceof Opt_Xml_Element) { if ($current->getXmlName() != $result[$j][4]) { throw new Opt_XmlInvalidOrder_Exception($result[$j][4], $current->getXmlName()); } } else { throw new Opt_XmlInvalidOrder_Exception($result[$j][4], 'NULL'); } $current = $this->_treeJumpOut($current); } else { throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]); } } if (strlen($subgroups[$i]) > $offset) { $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, strlen($subgroups[$i]) - $offset)); } } } // Testing if everything was closed. if ($current !== $tree) { // Error handling - determine the name of the unclosed tag. while (!$current instanceof Opt_Xml_Element) { $current = $current->getParent(); } throw new Opt_UnclosedTag_Exception($current->getXmlName()); } if ($this->_mode == 0 && $this->_tpl->singleRootNode) { // TODO: The current code does not check the contents of Opt_Text_Nodes and other root elements // that may contain invalid and valid XML syntax at the same time. // For now, this code is frozen, we'll think a bit about it in the future. Maybe nobody // will notice this :) $elementFound = false; foreach ($tree as $item) { if ($item instanceof Opt_Xml_Element) { if ($elementFound) { // Oops, there is already another root node! throw new Opt_XmlRootElement_Exception($item->getXmlName()); } $elementFound = true; } } } return $tree; }
public function testRootGetType() { $buffer = new Opt_Xml_Root(); $this->assertEquals('Opt_Xml_Root', $buffer->getType()); }