/** * Parses the content of the current APF DOM node. Extracts all tags contained in the current * document content. Each tag is converted into a child Document of the current tree element. * The tag definition place is remembered by a marker tag using the internal id of the DOM node. * <p/> * Since release 1.17 nested tag structures are supported. This means, that the APF parser * is able to handle symmetric structures like this: * <code> * <foo:bar> * <foo:bar> * </foo:bar> * </foo:bar> * </code> * Besides, the APF parser is able to handle asymmetric structures like * <code> * <foo:bar /> * <foo:bar> * <foo:bar> * <foo:bar /> * </foo:bar> * </foo:bar> * </code> * Please note that using nested structures must be supported by the tag implementations. * <p/> * The APF parser is able to handle nested tag structures with self-containing tags (directly of * across multiple hierarchies) as of version 2.2. Thus, you can re-use tags across any hierarchy * with the same prefix and name at your convenience or defined tags with different prefix and/or * name as desired (e.g. using the same implementation). * <p/> * To protect against infinite loops with broken tag structures the parser uses <em>self::$maxParserLoops</em> * to limit the parser cycles to a configurable amount of times. In case your project requires a * higher value, please set <em>Document::$maxParserLoops</em> to an appropriate value. * * @author Christian Schäfer, Christian Achatz * @version * Version 0.1, 28.12.2006<br /> * Version 0.2, 21.01.2007 (Bug-fix: a mixture of self- and exclusively closing tags lead to wrong parsing)<br /> * Version 0.3, 31.01.2007 (Added context injection)<br /> * Version 0.4, 09.04.2007 (Removed double attributes setting, added language injection)<br /> * Version 0.5, 02.04.2008 (Bug-fix: the token is now displayed in the HTML error page)<br /> * Version 0.6, 06.06.2009 (Improvement: content is not copied during parsing any more)<br /> * Version 0.7, 30.12.2009 (Introduced benchmark marks for the onParseTime() event.)<br /> * Version 0.8, 25.01.2013 (Re-writing of the tag parser to support nested tags with the same tag prefix and name)<br /> * Version 0.9, 20.06.2014 (Re-writing entire parser for 2.2 to support nested, self-containing tags across multiple hierarchies)<br /> */ protected function extractTagLibTags() { /** * The list of parsed tags within the current document. * * @var array */ $tags = []; /** * The number of tokens within the current document (introduced also for performance reasons). * * @var int */ $count = 0; /* @var $t BenchmarkTimer */ $t = Singleton::getInstance(BenchmarkTimer::class); $benchId = '(' . get_class($this) . ') ' . $this->getObjectId() . '::onParseTime()'; $t->start($benchId); /** * Position pointer for tag search. Introduced for performance reasons to skip stuff that we already searched in. * * @var int */ $offset = 0; while (($colon = strpos($this->content, ':', $offset)) !== false) { // start tag searching goes as follows // - tag sub string starting at the current position of the colon with 12 chars // search for last < in that sub-string // tag prefixes must not be longer than 10 characters - for security reasons $area = $colon >= 12 ? 12 : $colon; // in case the tag starts right at the beginning, the area to search gets smaller! $start = strrpos(substr($this->content, $colon - $area, $area), '<'); // no open tag found --> continue! if ($start === false) { $offset = $colon + 1; continue; } // do offset correction due to internal $start = $colon - $area + $start; // $area (12 by default) for the sub-string part // Avoid issue with "<li>FOO:" constructs that will be recognized as tag, // AND // avoid issues with "<a href="/?:action=logout">Logout</a>" structures // to be recognized as a tag (see ID#266). $prefix = substr($this->content, $start, $colon - $start); if (strpos($prefix, '>') !== false || strpos($prefix, '"') !== false) { $offset = $colon + 1; continue; } // avoid issues with parsing HTML comments including colons (e.g. <!-- foo:bar -->) if (substr($this->content, $start, 4) == '<!--') { $offset = $colon + 1; continue; } // find out whether we have an opening or closing tag $end = strpos($this->content, '>', $colon + 1); if ($end === false) { throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Invalid markup found. Please check your template code!', E_USER_ERROR); } if (substr($this->content, $start, 2) !== '</') { // ID#253: In case we are using an extended templating expression within a tag attribute // (e.g. "model[0]->getFoo()") the ending ">" is contained within the attribute and thus the first // strpos() produces wrong results. For this reason, search for the last ">" with an even number of // quotes in the string to fix this. $parserLoops = 0; while (substr_count(substr($this->content, $start, $end + 1 - $start), '"') % 2 !== 0) { $parserLoops++; if ($parserLoops > XmlParser::$maxParserLoops) { throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Maximum number of loops ("' . XmlParser::$maxParserLoops . '") exceeded!', E_USER_ERROR); } $end = strpos($this->content, '>', $end + 1); if ($end === false) { throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Invalid markup found. Please check your template code!', E_USER_ERROR); } } // Determine whether we have a self-closing tag or not. This is important // within the following lines how to handle the tag. if (substr($this->content, $end - 1, 1) == '/') { $selfClosing = true; } else { $selfClosing = false; } // s = tag start position $tags[$count]['s'] = $start; // p = tag prefix (e.g. "foo" with tag "<foo:bar />") $tags[$count]['p'] = substr($this->content, $start + 1, $colon - $start - 1); // search for next space to gather tag name $space = strpos($this->content, ' ', $colon); // in case we encounter tag definitions w/o spaces, reset the space position // to the next ">". if ($space === false) { if ($selfClosing === true) { $space = $end - 1; } else { $space = $end; } } // in case we encounter a closing bracket first, this may be due to // a tag without attributes <foo:bar>... if ($end < $space) { // reset space indicator to bracket position to support opening // tags without attributes (<foo:bar> </foo:bar>) if ($selfClosing) { // Correct position by minus one due to "/>" at the end. // This only holds true for "<foo:bar/>" tags (no space after tag name). $space = $end - 1; } else { $space = $end; } } // n = tag name (e.g. "bar" with tag "<foo:bar />") $tags[$count]['n'] = trim(substr($this->content, $colon + 1, $space - $colon - 1)); // instead of trim, maybe search for a new line instead // assemble the token to allow easier closing tag search $token = $tags[$count]['p'] . ':' . $tags[$count]['n']; if ($selfClosing === true) { // e = tag end position $tags[$count]['e'] = $end + 1; // set offset to end of last tag before starting with new one $offset = $end + 1; } else { // if we've got an opening/not self-closing tag, let's search for our pendent closing tag $tokenLength = strlen($token); $startTagLength = $tokenLength + 1; // 1 for "<" $endTagLength = $tokenLength + 3; // 3 for "</" plus ">" // initialize the token position cursor $tokenPos = $space; // for performance reasons: start right after the space to save some chars $tagEndPos = $end; $openingTagCount = 1; // for performance reasons: start at 1 and skip current tag position to save some chars $closingTagCount = 0; while (true) { $tokenPos = strpos($this->content, $token, $tokenPos); if ($tokenPos === false) { break; } if (substr($this->content, $tokenPos - 1, 1) == '<') { // Check for explicitly closing tag, because self-closing tags // do not count searching for a symmetric tag hierarchy included // in another tag structure. $bracket = strpos($this->content, '>', $tokenPos + $tokenLength); if (substr($this->content, $bracket - 1, 1) !== '/') { $openingTagCount++; } } else { if (substr($this->content, $tokenPos - 1, 1) == '/') { // ID#98: Check for explicit closing tag expressed by "</$token>" instead of relying // on the previous check. Otherwise, an occurrence of "$token" solely will lead to // a match for a closing tag which in fact is just an occurrence of the token. $closingTagCount++; } } // In case we have passed the first tag occurrence let's look for a symmetric // tag structure. This check enables nesting tag structures with the same // tag prefix and name. if ($openingTagCount > 0 && $openingTagCount == $closingTagCount) { $tagEndPos = $tokenPos - 2; break; } // Shift cursor to start search after current token position to recursively // search for the last symmetric end tag. $tokenPos = $tokenPos + $startTagLength; } // e = tag end position $tags[$count]['e'] = $tagEndPos + $endTagLength; // set offset to end of last tag before starting with new one to skip already analyzed parts $offset = $tags[$count]['e']; } } $count++; } // Offset correction necessary, since previously gathered tag positions are manipulated // during building-up the tag markers! $offsetCorrection = 0; // extract content and loop into recursion ... for ($i = 0; $i < $count; $i++) { $tagStringLength = $tags[$i]['e'] - $tags[$i]['s']; $attributes = XmlParser::getTagAttributes($tags[$i]['p'], $tags[$i]['n'], substr($this->content, $tags[$i]['s'] - $offsetCorrection, $tagStringLength)); // initialize object id, that is used to reference the object // within the APF DOM tree and to provide a unique key for the // children index. $objectId = XmlParser::generateUniqID(); // class is loaded by the class loader lazily $class = $this->getTagLibClass($tags[$i]['p'], $tags[$i]['n']); if ($class === null) { throw new ParserException('No tag definition found for prefix "' . $tags[$i]['p'] . '" and name "' . $tags[$i]['n'] . '" in document with type "' . get_class($this) . '"! Template code: ' . htmlentities($this->content)); } $this->children[$objectId] = new $class(); $this->children[$objectId]->setObjectId($objectId); // inject context of the parent object $this->children[$objectId]->setContext($this->getContext()); // inject language of the parent object $this->children[$objectId]->setLanguage($this->getLanguage()); // ID#230: add nodes defining the "dom-id" attribute to the index to allow easy access via getNodeById() if (isset($attributes['attributes'][self::ATTRIBUTE_DOM_ID])) { self::$documentIndex[$attributes['attributes'][self::ATTRIBUTE_DOM_ID]] =& $this->children[$objectId]; unset($attributes['attributes'][self::ATTRIBUTE_DOM_ID]); } // add the tag's attributes $this->children[$objectId]->setAttributes($attributes['attributes']); // replace the position of the taglib with a place holder // token string: <$objectId />. // this needs to be done, to be able to place the content of the // transformed tags at transformation time correctly $this->content = substr_replace($this->content, '<' . $objectId . ' />', $tags[$i]['s'] - $offsetCorrection, $tagStringLength); // advertise the parent object $this->children[$objectId]->setParentObject($this); // add the content to the current APF DOM node $this->children[$objectId]->setContent($attributes['content']); // call onParseTime() to enable the taglib to initialize itself $this->children[$objectId]->onParseTime(); // correct parser offset to correctly place marker tags at the next parsing loop // this is due to string replacement changes the initially gathered values $offsetCorrection += $tagStringLength - 36; } $t->stop($benchId); // call onAfterAppend() on each child to enable the taglib to interact with // other APF DOM nodes to do extended initialization. if (count($this->children) > 0) { $benchId = '(' . get_class($this) . ') ' . $this->getObjectId() . '::children[]::onAfterAppend()'; $t->start($benchId); foreach ($this->children as &$child) { $child->onAfterAppend(); } $t->stop($benchId); } }
public function testTagClosingSignInAttribute() { $expressionAttributeName = 'expression'; $expressions = ['foo', 'foo-bar', 'model[0]', 'model[0]->getFoo()', 'model->getFoo()->getBar()->getBaz()', 'model->getFoo()->getBar()[5]', 'self->getBar()[2]->getFoo()[3]->getBaz()']; foreach ($expressions as $expression) { $selfClosingTagString = '<dyn:expr ' . PHP_EOL . ' ' . $expressionAttributeName . '="' . $expression . '"' . PHP_EOL . '/>'; $attributes = XmlParser::getTagAttributes('dyn', 'expr', $selfClosingTagString); $this->assertEquals($attributes['attributes'][$expressionAttributeName], $expression); $explicitClosingTagString = '<dyn:expr ' . PHP_EOL . ' ' . $expressionAttributeName . '="' . $expression . '"' . PHP_EOL . '>' . PHP_EOL . 'this is tag content ' . PHP_EOL . '</dyn:expr>'; $attributes = XmlParser::getTagAttributes('dyn', 'expr', $explicitClosingTagString); $this->assertEquals($attributes['attributes'][$expressionAttributeName], $expression); } }