예제 #1
0
 /**
  * Parses the content of the current APF DOM node. Extracts all tags contained in the current
  * document content. Each tag is converted into a child Document of the current tree element.
  * The tag definition place is remembered by a marker tag using the internal id of the DOM node.
  * <p/>
  * Since release 1.17 nested tag structures are supported. This means, that the APF parser
  * is able to handle symmetric structures like this:
  * <code>
  * <foo:bar>
  *    <foo:bar>
  *    </foo:bar>
  * </foo:bar>
  * </code>
  * Besides, the APF parser is able to handle asymmetric structures like
  * <code>
  * <foo:bar />
  * <foo:bar>
  *    <foo:bar>
  *       <foo:bar />
  *    </foo:bar>
  * </foo:bar>
  * </code>
  * Please note that using nested structures must be supported by the tag implementations.
  * <p/>
  * The APF parser is able to handle nested tag structures with self-containing tags (directly of
  * across multiple hierarchies) as of version 2.2. Thus, you can re-use tags across any hierarchy
  * with the same prefix and name at your convenience or defined tags with different prefix and/or
  * name as desired (e.g. using the same implementation).
  * <p/>
  * To protect against infinite loops with broken tag structures the parser uses <em>self::$maxParserLoops</em>
  * to limit the parser cycles to a configurable amount of times. In case your project requires a
  * higher value, please set <em>Document::$maxParserLoops</em> to an appropriate value.
  *
  * @author Christian Schäfer, Christian Achatz
  * @version
  * Version 0.1, 28.12.2006<br />
  * Version 0.2, 21.01.2007 (Bug-fix: a mixture of self- and exclusively closing tags lead to wrong parsing)<br />
  * Version 0.3, 31.01.2007 (Added context injection)<br />
  * Version 0.4, 09.04.2007 (Removed double attributes setting, added language injection)<br />
  * Version 0.5, 02.04.2008 (Bug-fix: the token is now displayed in the HTML error page)<br />
  * Version 0.6, 06.06.2009 (Improvement: content is not copied during parsing any more)<br />
  * Version 0.7, 30.12.2009 (Introduced benchmark marks for the onParseTime() event.)<br />
  * Version 0.8, 25.01.2013 (Re-writing of the tag parser to support nested tags with the same tag prefix and name)<br />
  * Version 0.9, 20.06.2014 (Re-writing entire parser for 2.2 to support nested, self-containing tags across multiple hierarchies)<br />
  */
 protected function extractTagLibTags()
 {
     /**
      * The list of parsed tags within the current document.
      *
      * @var array
      */
     $tags = [];
     /**
      * The number of tokens within the current document (introduced also for performance reasons).
      *
      * @var int
      */
     $count = 0;
     /* @var $t BenchmarkTimer */
     $t = Singleton::getInstance(BenchmarkTimer::class);
     $benchId = '(' . get_class($this) . ') ' . $this->getObjectId() . '::onParseTime()';
     $t->start($benchId);
     /**
      * Position pointer for tag search. Introduced for performance reasons to skip stuff that we already searched in.
      *
      * @var int
      */
     $offset = 0;
     while (($colon = strpos($this->content, ':', $offset)) !== false) {
         // start tag searching goes as follows
         // - tag sub string starting at the current position of the colon with 12 chars
         // search for last < in that sub-string
         // tag prefixes must not be longer than 10 characters - for security reasons
         $area = $colon >= 12 ? 12 : $colon;
         // in case the tag starts right at the beginning, the area to search gets smaller!
         $start = strrpos(substr($this->content, $colon - $area, $area), '<');
         // no open tag found --> continue!
         if ($start === false) {
             $offset = $colon + 1;
             continue;
         }
         // do offset correction due to internal
         $start = $colon - $area + $start;
         // $area (12 by default) for the sub-string part
         // Avoid issue with "<li>FOO:" constructs that will be recognized as tag,
         // AND
         // avoid issues with "<a href="/?:action=logout">Logout</a>" structures
         // to be recognized as a tag (see ID#266).
         $prefix = substr($this->content, $start, $colon - $start);
         if (strpos($prefix, '>') !== false || strpos($prefix, '"') !== false) {
             $offset = $colon + 1;
             continue;
         }
         // avoid issues with parsing HTML comments including colons (e.g. <!-- foo:bar -->)
         if (substr($this->content, $start, 4) == '<!--') {
             $offset = $colon + 1;
             continue;
         }
         // find out whether we have an opening or closing tag
         $end = strpos($this->content, '>', $colon + 1);
         if ($end === false) {
             throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Invalid markup found. Please check your template code!', E_USER_ERROR);
         }
         if (substr($this->content, $start, 2) !== '</') {
             // ID#253: In case we are using an extended templating expression within a tag attribute
             // (e.g. "model[0]->getFoo()") the ending ">" is contained within the attribute and thus the first
             // strpos() produces wrong results. For this reason, search for the last ">" with an even number of
             // quotes in the string to fix this.
             $parserLoops = 0;
             while (substr_count(substr($this->content, $start, $end + 1 - $start), '"') % 2 !== 0) {
                 $parserLoops++;
                 if ($parserLoops > XmlParser::$maxParserLoops) {
                     throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Maximum number of loops ("' . XmlParser::$maxParserLoops . '") exceeded!', E_USER_ERROR);
                 }
                 $end = strpos($this->content, '>', $end + 1);
                 if ($end === false) {
                     throw new ParserException('[Document::extractTagLibTags()] Error while parsing: "' . $this->content . '". Invalid markup found. Please check your template code!', E_USER_ERROR);
                 }
             }
             // Determine whether we have a self-closing tag or not. This is important
             // within the following lines how to handle the tag.
             if (substr($this->content, $end - 1, 1) == '/') {
                 $selfClosing = true;
             } else {
                 $selfClosing = false;
             }
             // s = tag start position
             $tags[$count]['s'] = $start;
             // p = tag prefix (e.g. "foo" with tag "<foo:bar />")
             $tags[$count]['p'] = substr($this->content, $start + 1, $colon - $start - 1);
             // search for next space to gather tag name
             $space = strpos($this->content, ' ', $colon);
             // in case we encounter tag definitions w/o spaces, reset the space position
             // to the next ">".
             if ($space === false) {
                 if ($selfClosing === true) {
                     $space = $end - 1;
                 } else {
                     $space = $end;
                 }
             }
             // in case we encounter a closing bracket first, this may be due to
             // a tag without attributes <foo:bar>...
             if ($end < $space) {
                 // reset space indicator to bracket position to support opening
                 // tags without attributes (<foo:bar> </foo:bar>)
                 if ($selfClosing) {
                     // Correct position by minus one due to "/>" at the end.
                     // This only holds true for "<foo:bar/>" tags (no space after tag name).
                     $space = $end - 1;
                 } else {
                     $space = $end;
                 }
             }
             // n = tag name (e.g. "bar" with tag "<foo:bar />")
             $tags[$count]['n'] = trim(substr($this->content, $colon + 1, $space - $colon - 1));
             // instead of trim, maybe search for a new line instead
             // assemble the token to allow easier closing tag search
             $token = $tags[$count]['p'] . ':' . $tags[$count]['n'];
             if ($selfClosing === true) {
                 // e = tag end position
                 $tags[$count]['e'] = $end + 1;
                 // set offset to end of last tag before starting with new one
                 $offset = $end + 1;
             } else {
                 // if we've got an opening/not self-closing tag, let's search for our pendent closing tag
                 $tokenLength = strlen($token);
                 $startTagLength = $tokenLength + 1;
                 // 1 for "<"
                 $endTagLength = $tokenLength + 3;
                 // 3 for "</" plus ">"
                 // initialize the token position cursor
                 $tokenPos = $space;
                 // for performance reasons: start right after the space to save some chars
                 $tagEndPos = $end;
                 $openingTagCount = 1;
                 // for performance reasons: start at 1 and skip current tag position to save some chars
                 $closingTagCount = 0;
                 while (true) {
                     $tokenPos = strpos($this->content, $token, $tokenPos);
                     if ($tokenPos === false) {
                         break;
                     }
                     if (substr($this->content, $tokenPos - 1, 1) == '<') {
                         // Check for explicitly closing tag, because self-closing tags
                         // do not count searching for a symmetric tag hierarchy included
                         // in another tag structure.
                         $bracket = strpos($this->content, '>', $tokenPos + $tokenLength);
                         if (substr($this->content, $bracket - 1, 1) !== '/') {
                             $openingTagCount++;
                         }
                     } else {
                         if (substr($this->content, $tokenPos - 1, 1) == '/') {
                             // ID#98: Check for explicit closing tag expressed by "</$token>" instead of relying
                             // on the previous check. Otherwise, an occurrence of "$token" solely will lead to
                             // a match for a closing tag which in fact is just an occurrence of the token.
                             $closingTagCount++;
                         }
                     }
                     // In case we have passed the first tag occurrence let's look for a symmetric
                     // tag structure. This check enables nesting tag structures with the same
                     // tag prefix and name.
                     if ($openingTagCount > 0 && $openingTagCount == $closingTagCount) {
                         $tagEndPos = $tokenPos - 2;
                         break;
                     }
                     // Shift cursor to start search after current token position to recursively
                     // search for the last symmetric end tag.
                     $tokenPos = $tokenPos + $startTagLength;
                 }
                 // e = tag end position
                 $tags[$count]['e'] = $tagEndPos + $endTagLength;
                 // set offset to end of last tag before starting with new one to skip already analyzed parts
                 $offset = $tags[$count]['e'];
             }
         }
         $count++;
     }
     // Offset correction necessary, since previously gathered tag positions are manipulated
     // during building-up the tag markers!
     $offsetCorrection = 0;
     // extract content and loop into recursion ...
     for ($i = 0; $i < $count; $i++) {
         $tagStringLength = $tags[$i]['e'] - $tags[$i]['s'];
         $attributes = XmlParser::getTagAttributes($tags[$i]['p'], $tags[$i]['n'], substr($this->content, $tags[$i]['s'] - $offsetCorrection, $tagStringLength));
         // initialize object id, that is used to reference the object
         // within the APF DOM tree and to provide a unique key for the
         // children index.
         $objectId = XmlParser::generateUniqID();
         // class is loaded by the class loader lazily
         $class = $this->getTagLibClass($tags[$i]['p'], $tags[$i]['n']);
         if ($class === null) {
             throw new ParserException('No tag definition found for prefix "' . $tags[$i]['p'] . '" and name "' . $tags[$i]['n'] . '" in document with type "' . get_class($this) . '"! Template code: ' . htmlentities($this->content));
         }
         $this->children[$objectId] = new $class();
         $this->children[$objectId]->setObjectId($objectId);
         // inject context of the parent object
         $this->children[$objectId]->setContext($this->getContext());
         // inject language of the parent object
         $this->children[$objectId]->setLanguage($this->getLanguage());
         // ID#230: add nodes defining the "dom-id" attribute to the index to allow easy access via getNodeById()
         if (isset($attributes['attributes'][self::ATTRIBUTE_DOM_ID])) {
             self::$documentIndex[$attributes['attributes'][self::ATTRIBUTE_DOM_ID]] =& $this->children[$objectId];
             unset($attributes['attributes'][self::ATTRIBUTE_DOM_ID]);
         }
         // add the tag's attributes
         $this->children[$objectId]->setAttributes($attributes['attributes']);
         // replace the position of the taglib with a place holder
         // token string: <$objectId />.
         // this needs to be done, to be able to place the content of the
         // transformed tags at transformation time correctly
         $this->content = substr_replace($this->content, '<' . $objectId . ' />', $tags[$i]['s'] - $offsetCorrection, $tagStringLength);
         // advertise the parent object
         $this->children[$objectId]->setParentObject($this);
         // add the content to the current APF DOM node
         $this->children[$objectId]->setContent($attributes['content']);
         // call onParseTime() to enable the taglib to initialize itself
         $this->children[$objectId]->onParseTime();
         // correct parser offset to correctly place marker tags at the next parsing loop
         // this is due to string replacement changes the initially gathered values
         $offsetCorrection += $tagStringLength - 36;
     }
     $t->stop($benchId);
     // call onAfterAppend() on each child to enable the taglib to interact with
     // other APF DOM nodes to do extended initialization.
     if (count($this->children) > 0) {
         $benchId = '(' . get_class($this) . ') ' . $this->getObjectId() . '::children[]::onAfterAppend()';
         $t->start($benchId);
         foreach ($this->children as &$child) {
             $child->onAfterAppend();
         }
         $t->stop($benchId);
     }
 }
예제 #2
0
 public function testTagClosingSignInAttribute()
 {
     $expressionAttributeName = 'expression';
     $expressions = ['foo', 'foo-bar', 'model[0]', 'model[0]->getFoo()', 'model->getFoo()->getBar()->getBaz()', 'model->getFoo()->getBar()[5]', 'self->getBar()[2]->getFoo()[3]->getBaz()'];
     foreach ($expressions as $expression) {
         $selfClosingTagString = '<dyn:expr ' . PHP_EOL . '   ' . $expressionAttributeName . '="' . $expression . '"' . PHP_EOL . '/>';
         $attributes = XmlParser::getTagAttributes('dyn', 'expr', $selfClosingTagString);
         $this->assertEquals($attributes['attributes'][$expressionAttributeName], $expression);
         $explicitClosingTagString = '<dyn:expr ' . PHP_EOL . '   ' . $expressionAttributeName . '="' . $expression . '"' . PHP_EOL . '>' . PHP_EOL . 'this is tag content ' . PHP_EOL . '</dyn:expr>';
         $attributes = XmlParser::getTagAttributes('dyn', 'expr', $explicitClosingTagString);
         $this->assertEquals($attributes['attributes'][$expressionAttributeName], $expression);
     }
 }