/** * Parses the input code and returns the OPT XML tree. * * @param String $filename The file name (for debug purposes) * @param String &$code The code to parse * @return Opt_Xml_Root */ public function parse($filename, &$code) { $debug = array(XMLReader::NONE => 'NONE', XMLReader::ELEMENT => 'ELEMENT', XMLReader::ATTRIBUTE => 'ATTRIBUTE', XMLReader::TEXT => 'TEXT', XMLReader::CDATA => 'CDATA', XMLReader::ENTITY_REF => 'ENTITY_REF', XMLReader::ENTITY => 'ENTITY', XMLReader::PI => 'PI', XMLReader::COMMENT => 'COMMENT', XMLReader::DOC => 'DOC', XMLReader::DOC_TYPE => 'DOC_TYPE', XMLReader::DOC_FRAGMENT => 'DOC_FRAGMENT', XMLReader::NOTATION => 'NOTATION', XMLReader::WHITESPACE => 'WHITESPACE', XMLReader::SIGNIFICANT_WHITESPACE => 'SIGNIFICANT_WHITESPACE', XMLReader::END_ELEMENT => 'END_ELEMENT', XMLReader::END_ENTITY => 'END_ENTITY', XMLReader::XML_DECLARATION => 'XML_DECLARATION'); libxml_use_internal_errors(true); $reader = new XMLReader(); $reader->xml($code); // $reader->setParserProperty(XMLReader::LOADDTD, true); // $reader->setParserProperty(XMLReader::VALIDATE, true); $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); $root = $current = new Opt_Xml_Root(); $firstElementMatched = false; $depth = 0; // Thanks, Oh Great PHP for your excellent WARNINGS!!! >:( while (@$reader->read()) { if ($reader->depth < $depth) { $current = $current->getParent(); } elseif ($reader->depth > $depth) { $current = $optNode; } // Opl_Debug::write($debug[$reader->nodeType].': '.$reader->name.', '.$reader->value); switch ($reader->nodeType) { // XML elements case XMLReader::ELEMENT: $optNode = new Opt_Xml_Element($reader->name); // Parse element attributes, if you manage to get there if ($reader->moveToFirstAttribute()) { do { // "xmlns" special namespace must be handler somehow differently. if ($reader->prefix == 'xmlns') { $ns = str_replace('xmlns:', '', $reader->name); $root->addNamespace($ns, $reader->value); // Let this attribute to appear, if it does not represent an OPT special // namespace if (!$this->_compiler->isNamespace($ns)) { $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value); $optNode->addAttribute($optAttribute); } } else { $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value); $optNode->addAttribute($optAttribute); } } while ($reader->moveToNextAttribute()); $reader->moveToElement(); } // Set "rootNode" flag if (!$firstElementMatched) { $optNode->set('rootNode', true); $firstElementMatched = true; } // Set "single" flag if ($reader->isEmptyElement) { $optNode->set('single', true); } $current->appendChild($optNode); break; case XMLReader::TEXT: $this->_treeTextCompile($current, $reader->value); break; case XMLReader::COMMENT: $optNode = new Opt_Xml_Comment($reader->value); $current->appendChild($optNode); break; case XMLReader::CDATA: $cdata = new Opt_Xml_Cdata($reader->value); $cdata->set('cdata', true); if ($current instanceof Opt_Xml_Text) { $current->appendChild($cdata); } else { $text = new Opt_Xml_Text(); $text->appendChild($cdata); $current->appendChild($text); $current = $text; } break; /* case XMLReader::SIGNIFICANT_WHITESPACE: $cdata = new Opt_Xml_Cdata($reader->value); $cdata->set('cdata', true); if($current instanceof Opt_Xml_Text) { $current->appendChild($cdata); } else { $text = new Opt_Xml_Text(); $text->appendChild($cdata); $current->appendChild($text); $current = $text; } break; */ } $depth = $reader->depth; } // Error checking $errors = libxml_get_errors(); if (sizeof($errors) > 0) { libxml_clear_errors(); foreach ($errors as $error) { echo $error->message . ' (' . $error->line . ')<br/>'; } } return $root; }
/** * Parses the input code and returns the OPT XML tree. * * @param String $filename The file name (for debug purposes) * @param String &$code The code to parse * @return Opt_Xml_Root */ public function parse($filename, &$code) { $current = $tree = new Opt_Xml_Root(); $codeSize = strlen($code); $encoding = $this->_tpl->charset; // First we have to find the prolog and DTD. Then we will be able to parse tags. if ($this->_mode == 0) { // Find and parse XML prolog $endProlog = 0; $endDoctype = 0; if (substr($code, 0, 5) == '<?xml') { $endProlog = strpos($code, '?>', 5); if ($endProlog === false) { throw new Opt_XmlInvalidProlog_Exception('prolog ending is missing'); } $values = $this->_compileProlog(substr($code, 5, $endProlog - 5)); $endProlog += 2; if (!$this->_tpl->prologRequired) { // The prolog must be displayed $tree->setProlog(new Opt_Xml_Prolog($values)); } } // Skip white spaces for ($i = $endProlog; $i < $codeSize; $i++) { if ($code[$i] != ' ' && $code[$i] != ' ' && $code[$i] != "\r" && $code[$i] != "\n") { break; } } // Try to find doctype at the new position. $possibleDoctype = substr($code, $i, 9); if ($possibleDoctype == '<!doctype' || $possibleDoctype == '<!DOCTYPE') { // OK, we've found it, now determine the doctype end. $bracketCounter = 0; $doctypeStart = $i; for ($i += 9; $i < $codeSize; $i++) { if ($code[$i] == '<') { $bracketCounter++; } else { if ($code[$i] == '>') { if ($bracketCounter == 0) { $endDoctype = $i; break; } $bracketCounter--; } } } if ($endDoctype == 0) { throw new Opt_XmlInvalidDoctype_Exception('doctype ending is missing'); } if (!$this->_tpl->prologRequired) { $tree->setDtd(new Opt_Xml_Dtd(substr($code, $doctypeStart, $i - $doctypeStart + 1))); } $endDoctype++; } else { $endDoctype = $endProlog; } // OK, now skip that part. $code = substr($code, $endDoctype, $codeSize); // In the quirks mode, some results from the regular expression parser are // moved by one position, so we must add some dynamics here. $attributeCell = 5; $endingSlashCell = 6; $tagExpression = $this->_rXmlTagExpression; } else { $tagExpression = $this->_rQuirksTagExpression; $attributeCell = 6; $endingSlashCell = 7; } // Split through the general groups (cdata-content) $groups = preg_split($this->_rCDataExpression, $code, 0, PREG_SPLIT_DELIM_CAPTURE); $groupCnt = sizeof($groups); $groupState = 0; Opt_Xml_Cdata::$mode = $this->_mode; for ($k = 0; $k < $groupCnt; $k++) { // Process CDATA if ($groupState == 0 && $groups[$k] == '<![CDATA[') { $cdata = new Opt_Xml_Cdata(''); $cdata->set('cdata', true); $groupState = 1; continue; } if ($groupState == 1) { if ($groups[$k] == ']]>') { $current = $this->_treeTextAppend($current, $cdata); $groupState = 0; } else { $cdata->appendData($groups[$k]); } continue; } $subgroups = preg_split($this->_rCommentExpression, $groups[$k], 0, PREG_SPLIT_DELIM_CAPTURE); $subgroupCnt = sizeof($subgroups); $subgroupState = 0; for ($i = 0; $i < $subgroupCnt; $i++) { // Process comments if ($subgroupState == 0 && $subgroups[$i] == '<!--') { $commentNode = new Opt_Xml_Comment(); $subgroupState = 1; continue; } if ($subgroupState == 1) { if ($subgroups[$i] == '-->') { $current->appendChild($commentNode); $subgroupState = 0; } else { $commentNode->appendData($subgroups[$i]); } continue; } elseif ($subgroups[$i] == '-->') { throw new Opt_XmlInvalidCharacter_Exception('-->'); } // Find XML tags preg_match_all($tagExpression, $subgroups[$i], $result, PREG_SET_ORDER); /* * Output field description for $result array: * 0 - original content * 1 - tag content (without delimiters) * 2 - /, if enclosing tag * 3 - name * 4 - arguments (5 in quirks mode) * 5 - /, if enclosing tag without subcontent (6 in quirks mode) */ $resultSize = sizeof($result); $offset = 0; for ($j = 0; $j < $resultSize; $j++) { // Copy the remaining text to the text node $id = strpos($subgroups[$i], $result[$j][0], $offset); if ($id > $offset) { $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, $id - $offset)); } $offset = $id + strlen($result[$j][0]); if (!isset($result[$j][$endingSlashCell])) { $result[$j][$endingSlashCell] = ''; } // Process the argument list $attributes = array(); if (!empty($result[$j][$attributeCell])) { // Just for sure... $result[$j][$attributeCell] = trim($result[$j][$attributeCell]); $oldLength = strlen($result[$j][$attributeCell]); $result[$j][$attributeCell] = rtrim($result[$j][$attributeCell], '/'); if (strlen($result[$j][$attributeCell]) != $oldLength) { $result[$j][$endingSlashCell] = '/'; } $attributes = $this->_compileAttributes($result[$j][$attributeCell]); if (!is_array($attributes)) { throw new Opt_XmlInvalidAttribute_Exception($result[$j][0]); } } // Recognize the tag type if ($result[$j][3] != '/') { // Opening tag $node = new Opt_Xml_Element($result[$j][4]); $node->set('single', $result[$j][$endingSlashCell] == '/'); foreach ($attributes as $name => $value) { $node->addAttribute($anode = new Opt_Xml_Attribute($name, $value)); } $current = $this->_treeNodeAppend($current, $node, $result[$j][$endingSlashCell] != '/'); } elseif ($result[$j][3] == '/') { if (sizeof($attributes) > 0) { throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]); } if ($current instanceof Opt_Xml_Element) { if ($current->getXmlName() != $result[$j][4]) { throw new Opt_XmlInvalidOrder_Exception($result[$j][4], $current->getXmlName()); } } else { throw new Opt_XmlInvalidOrder_Exception($result[$j][4], 'NULL'); } $current = $this->_treeJumpOut($current); } else { throw new Opt_XmlInvalidTagStructure_Exception($result[$j][0]); } } if (strlen($subgroups[$i]) > $offset) { $current = $this->_treeTextCompile($current, substr($subgroups[$i], $offset, strlen($subgroups[$i]) - $offset)); } } } // Testing if everything was closed. if ($current !== $tree) { // Error handling - determine the name of the unclosed tag. while (!$current instanceof Opt_Xml_Element) { $current = $current->getParent(); } throw new Opt_UnclosedTag_Exception($current->getXmlName()); } if ($this->_mode == 0 && $this->_tpl->singleRootNode) { // TODO: The current code does not check the contents of Opt_Text_Nodes and other root elements // that may contain invalid and valid XML syntax at the same time. // For now, this code is frozen, we'll think a bit about it in the future. Maybe nobody // will notice this :) $elementFound = false; foreach ($tree as $item) { if ($item instanceof Opt_Xml_Element) { if ($elementFound) { // Oops, there is already another root node! throw new Opt_XmlRootElement_Exception($item->getXmlName()); } $elementFound = true; } } } return $tree; }
public function testCdataGetType() { $buffer = new Opt_Xml_Cdata('foo'); $this->assertEquals('Opt_Xml_Cdata', $buffer->getType()); }