コード例 #1
0
ファイル: Preprocessor_DOM.php プロジェクト: mb720/mediawiki
 /**
  * @param string $text
  * @param int $flags
  * @return string
  */
 public function preprocessToXml($text, $flags = 0)
 {
     $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
     $xmlishElements = $this->parser->getStripList();
     $enableOnlyinclude = false;
     if ($forInclusion) {
         $ignoredTags = array('includeonly', '/includeonly');
         $ignoredElements = array('noinclude');
         $xmlishElements[] = 'noinclude';
         if (strpos($text, '<onlyinclude>') !== false && strpos($text, '</onlyinclude>') !== false) {
             $enableOnlyinclude = true;
         }
     } else {
         $ignoredTags = array('noinclude', '/noinclude', 'onlyinclude', '/onlyinclude');
         $ignoredElements = array('includeonly');
         $xmlishElements[] = 'includeonly';
     }
     $xmlishRegex = implode('|', array_merge($xmlishElements, $ignoredTags));
     // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
     $elementsRegex = "~({$xmlishRegex})(?:\\s|\\/>|>)|(!--)~iA";
     $stack = new PPDStack();
     $searchBase = "[{<\n";
     # }
     // For fast reverse searches
     $revText = strrev($text);
     $lengthText = strlen($text);
     // Input pointer, starts out pointing to a pseudo-newline before the start
     $i = 0;
     // Current accumulator
     $accum =& $stack->getAccum();
     $accum = '<root>';
     // True to find equals signs in arguments
     $findEquals = false;
     // True to take notice of pipe characters
     $findPipe = false;
     $headingIndex = 1;
     // True if $i is inside a possible heading
     $inHeading = false;
     // True if there are no more greater-than (>) signs right of $i
     $noMoreGT = false;
     // True to ignore all input up to the next <onlyinclude>
     $findOnlyinclude = $enableOnlyinclude;
     // Do a line-start run without outputting an LF character
     $fakeLineStart = true;
     while (true) {
         // $this->memCheck();
         if ($findOnlyinclude) {
             // Ignore all input up to the next <onlyinclude>
             $startPos = strpos($text, '<onlyinclude>', $i);
             if ($startPos === false) {
                 // Ignored section runs to the end
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $i)) . '</ignore>';
                 break;
             }
             $tagEndPos = $startPos + strlen('<onlyinclude>');
             // past-the-end
             $accum .= '<ignore>' . htmlspecialchars(substr($text, $i, $tagEndPos - $i)) . '</ignore>';
             $i = $tagEndPos;
             $findOnlyinclude = false;
         }
         if ($fakeLineStart) {
             $found = 'line-start';
             $curChar = '';
         } else {
             # Find next opening brace, closing brace or pipe
             $search = $searchBase;
             if ($stack->top === false) {
                 $currentClosing = '';
             } else {
                 $currentClosing = $stack->top->close;
                 $search .= $currentClosing;
             }
             if ($findPipe) {
                 $search .= '|';
             }
             if ($findEquals) {
                 // First equals will be for the template
                 $search .= '=';
             }
             $rule = null;
             # Output literal section, advance input counter
             $literalLength = strcspn($text, $search, $i);
             if ($literalLength > 0) {
                 $accum .= htmlspecialchars(substr($text, $i, $literalLength));
                 $i += $literalLength;
             }
             if ($i >= $lengthText) {
                 if ($currentClosing == "\n") {
                     // Do a past-the-end run to finish off the heading
                     $curChar = '';
                     $found = 'line-end';
                 } else {
                     # All done
                     break;
                 }
             } else {
                 $curChar = $text[$i];
                 if ($curChar == '|') {
                     $found = 'pipe';
                 } elseif ($curChar == '=') {
                     $found = 'equals';
                 } elseif ($curChar == '<') {
                     $found = 'angle';
                 } elseif ($curChar == "\n") {
                     if ($inHeading) {
                         $found = 'line-end';
                     } else {
                         $found = 'line-start';
                     }
                 } elseif ($curChar == $currentClosing) {
                     $found = 'close';
                 } elseif (isset($this->rules[$curChar])) {
                     $found = 'open';
                     $rule = $this->rules[$curChar];
                 } else {
                     # Some versions of PHP have a strcspn which stops on null characters
                     # Ignore and continue
                     ++$i;
                     continue;
                 }
             }
         }
         if ($found == 'angle') {
             $matches = false;
             // Handle </onlyinclude>
             if ($enableOnlyinclude && substr($text, $i, strlen('</onlyinclude>')) == '</onlyinclude>') {
                 $findOnlyinclude = true;
                 continue;
             }
             // Determine element name
             if (!preg_match($elementsRegex, $text, $matches, 0, $i + 1)) {
                 // Element name missing or not listed
                 $accum .= '&lt;';
                 ++$i;
                 continue;
             }
             // Handle comments
             if (isset($matches[2]) && $matches[2] == '!--') {
                 // To avoid leaving blank lines, when a sequence of
                 // space-separated comments is both preceded and followed by
                 // a newline (ignoring spaces), then
                 // trim leading and trailing spaces and the trailing newline.
                 // Find the end
                 $endPos = strpos($text, '-->', $i + 4);
                 if ($endPos === false) {
                     // Unclosed comment in input, runs to end
                     $inner = substr($text, $i);
                     $accum .= '<comment>' . htmlspecialchars($inner) . '</comment>';
                     $i = $lengthText;
                 } else {
                     // Search backwards for leading whitespace
                     $wsStart = $i ? $i - strspn($revText, " \t", $lengthText - $i) : 0;
                     // Search forwards for trailing whitespace
                     // $wsEnd will be the position of the last space (or the '>' if there's none)
                     $wsEnd = $endPos + 2 + strspn($text, " \t", $endPos + 3);
                     // Keep looking forward as long as we're finding more
                     // comments.
                     $comments = array(array($wsStart, $wsEnd));
                     while (substr($text, $wsEnd + 1, 4) == '<!--') {
                         $c = strpos($text, '-->', $wsEnd + 4);
                         if ($c === false) {
                             break;
                         }
                         $c = $c + 2 + strspn($text, " \t", $c + 3);
                         $comments[] = array($wsEnd + 1, $c);
                         $wsEnd = $c;
                     }
                     // Eat the line if possible
                     // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
                     // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
                     // it's a possible beneficial b/c break.
                     if ($wsStart > 0 && substr($text, $wsStart - 1, 1) == "\n" && substr($text, $wsEnd + 1, 1) == "\n") {
                         // Remove leading whitespace from the end of the accumulator
                         // Sanity check first though
                         $wsLength = $i - $wsStart;
                         if ($wsLength > 0 && strspn($accum, " \t", -$wsLength) === $wsLength) {
                             $accum = substr($accum, 0, -$wsLength);
                         }
                         // Dump all but the last comment to the accumulator
                         foreach ($comments as $j => $com) {
                             $startPos = $com[0];
                             $endPos = $com[1] + 1;
                             if ($j == count($comments) - 1) {
                                 break;
                             }
                             $inner = substr($text, $startPos, $endPos - $startPos);
                             $accum .= '<comment>' . htmlspecialchars($inner) . '</comment>';
                         }
                         // Do a line-start run next time to look for headings after the comment
                         $fakeLineStart = true;
                     } else {
                         // No line to eat, just take the comment itself
                         $startPos = $i;
                         $endPos += 2;
                     }
                     if ($stack->top) {
                         $part = $stack->top->getCurrentPart();
                         if (!(isset($part->commentEnd) && $part->commentEnd == $wsStart - 1)) {
                             $part->visualEnd = $wsStart;
                         }
                         // Else comments abutting, no change in visual end
                         $part->commentEnd = $endPos;
                     }
                     $i = $endPos + 1;
                     $inner = substr($text, $startPos, $endPos - $startPos + 1);
                     $accum .= '<comment>' . htmlspecialchars($inner) . '</comment>';
                 }
                 continue;
             }
             $name = $matches[1];
             $lowerName = strtolower($name);
             $attrStart = $i + strlen($name) + 1;
             // Find end of tag
             $tagEndPos = $noMoreGT ? false : strpos($text, '>', $attrStart);
             if ($tagEndPos === false) {
                 // Infinite backtrack
                 // Disable tag search to prevent worst-case O(N^2) performance
                 $noMoreGT = true;
                 $accum .= '&lt;';
                 ++$i;
                 continue;
             }
             // Handle ignored tags
             if (in_array($lowerName, $ignoredTags)) {
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $i, $tagEndPos - $i + 1)) . '</ignore>';
                 $i = $tagEndPos + 1;
                 continue;
             }
             $tagStartPos = $i;
             if ($text[$tagEndPos - 1] == '/') {
                 $attrEnd = $tagEndPos - 1;
                 $inner = null;
                 $i = $tagEndPos + 1;
                 $close = '';
             } else {
                 $attrEnd = $tagEndPos;
                 // Find closing tag
                 if (preg_match("/<\\/" . preg_quote($name, '/') . "\\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1)) {
                     $inner = substr($text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1);
                     $i = $matches[0][1] + strlen($matches[0][0]);
                     $close = '<close>' . htmlspecialchars($matches[0][0]) . '</close>';
                 } else {
                     // No end tag -- let it run out to the end of the text.
                     $inner = substr($text, $tagEndPos + 1);
                     $i = $lengthText;
                     $close = '';
                 }
             }
             // <includeonly> and <noinclude> just become <ignore> tags
             if (in_array($lowerName, $ignoredElements)) {
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $tagStartPos, $i - $tagStartPos)) . '</ignore>';
                 continue;
             }
             $accum .= '<ext>';
             if ($attrEnd <= $attrStart) {
                 $attr = '';
             } else {
                 $attr = substr($text, $attrStart, $attrEnd - $attrStart);
             }
             $accum .= '<name>' . htmlspecialchars($name) . '</name>' . '<attr>' . htmlspecialchars($attr) . '</attr>';
             if ($inner !== null) {
                 $accum .= '<inner>' . htmlspecialchars($inner) . '</inner>';
             }
             $accum .= $close . '</ext>';
         } elseif ($found == 'line-start') {
             // Is this the start of a heading?
             // Line break belongs before the heading element in any case
             if ($fakeLineStart) {
                 $fakeLineStart = false;
             } else {
                 $accum .= $curChar;
                 $i++;
             }
             $count = strspn($text, '=', $i, 6);
             if ($count == 1 && $findEquals) {
                 // DWIM: This looks kind of like a name/value separator.
                 // Let's let the equals handler have it and break the
                 // potential heading. This is heuristic, but AFAICT the
                 // methods for completely correct disambiguation are very
                 // complex.
             } elseif ($count > 0) {
                 $piece = array('open' => "\n", 'close' => "\n", 'parts' => array(new PPDPart(str_repeat('=', $count))), 'startPos' => $i, 'count' => $count);
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 $flags = $stack->getFlags();
                 extract($flags);
                 $i += $count;
             }
         } elseif ($found == 'line-end') {
             $piece = $stack->top;
             // A heading must be open, otherwise \n wouldn't have been in the search list
             assert('$piece->open == "\\n"');
             $part = $piece->getCurrentPart();
             // Search back through the input to see if it has a proper close.
             // Do this using the reversed string since the other solutions
             // (end anchor, etc.) are inefficient.
             $wsLength = strspn($revText, " \t", $lengthText - $i);
             $searchStart = $i - $wsLength;
             if (isset($part->commentEnd) && $searchStart - 1 == $part->commentEnd) {
                 // Comment found at line end
                 // Search for equals signs before the comment
                 $searchStart = $part->visualEnd;
                 $searchStart -= strspn($revText, " \t", $lengthText - $searchStart);
             }
             $count = $piece->count;
             $equalsLength = strspn($revText, '=', $lengthText - $searchStart);
             if ($equalsLength > 0) {
                 if ($searchStart - $equalsLength == $piece->startPos) {
                     // This is just a single string of equals signs on its own line
                     // Replicate the doHeadings behavior /={count}(.+)={count}/
                     // First find out how many equals signs there really are (don't stop at 6)
                     $count = $equalsLength;
                     if ($count < 3) {
                         $count = 0;
                     } else {
                         $count = min(6, intval(($count - 1) / 2));
                     }
                 } else {
                     $count = min($equalsLength, $count);
                 }
                 if ($count > 0) {
                     // Normal match, output <h>
                     $element = "<h level=\"{$count}\" i=\"{$headingIndex}\">{$accum}</h>";
                     $headingIndex++;
                 } else {
                     // Single equals sign on its own line, count=0
                     $element = $accum;
                 }
             } else {
                 // No match, no <h>, just pass down the inner text
                 $element = $accum;
             }
             // Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             $flags = $stack->getFlags();
             extract($flags);
             // Append the result to the enclosing accumulator
             $accum .= $element;
             // Note that we do NOT increment the input pointer.
             // This is because the closing linebreak could be the opening linebreak of
             // another heading. Infinite loops are avoided because the next iteration MUST
             // hit the heading open case above, which unconditionally increments the
             // input pointer.
         } elseif ($found == 'open') {
             # count opening brace characters
             $count = strspn($text, $curChar, $i);
             # we need to add to stack only if opening brace count is enough for one of the rules
             if ($count >= $rule['min']) {
                 # Add it to the stack
                 $piece = array('open' => $curChar, 'close' => $rule['end'], 'count' => $count, 'lineStart' => $i > 0 && $text[$i - 1] == "\n");
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 $flags = $stack->getFlags();
                 extract($flags);
             } else {
                 # Add literal brace(s)
                 $accum .= htmlspecialchars(str_repeat($curChar, $count));
             }
             $i += $count;
         } elseif ($found == 'close') {
             $piece = $stack->top;
             # lets check if there are enough characters for closing brace
             $maxCount = $piece->count;
             $count = strspn($text, $curChar, $i, $maxCount);
             # check for maximum matching characters (if there are 5 closing
             # characters, we will probably need only 3 - depending on the rules)
             $rule = $this->rules[$piece->open];
             if ($count > $rule['max']) {
                 # The specified maximum exists in the callback array, unless the caller
                 # has made an error
                 $matchingCount = $rule['max'];
             } else {
                 # Count is less than the maximum
                 # Skip any gaps in the callback array to find the true largest match
                 # Need to use array_key_exists not isset because the callback can be null
                 $matchingCount = $count;
                 while ($matchingCount > 0 && !array_key_exists($matchingCount, $rule['names'])) {
                     --$matchingCount;
                 }
             }
             if ($matchingCount <= 0) {
                 # No matching element found in callback array
                 # Output a literal closing brace and continue
                 $accum .= htmlspecialchars(str_repeat($curChar, $count));
                 $i += $count;
                 continue;
             }
             $name = $rule['names'][$matchingCount];
             if ($name === null) {
                 // No element, just literal text
                 $element = $piece->breakSyntax($matchingCount) . str_repeat($rule['end'], $matchingCount);
             } else {
                 # Create XML element
                 # Note: $parts is already XML, does not need to be encoded further
                 $parts = $piece->parts;
                 $title = $parts[0]->out;
                 unset($parts[0]);
                 # The invocation is at the start of the line if lineStart is set in
                 # the stack, and all opening brackets are used up.
                 if ($maxCount == $matchingCount && !empty($piece->lineStart)) {
                     $attr = ' lineStart="1"';
                 } else {
                     $attr = '';
                 }
                 $element = "<{$name}{$attr}>";
                 $element .= "<title>{$title}</title>";
                 $argIndex = 1;
                 foreach ($parts as $part) {
                     if (isset($part->eqpos)) {
                         $argName = substr($part->out, 0, $part->eqpos);
                         $argValue = substr($part->out, $part->eqpos + 1);
                         $element .= "<part><name>{$argName}</name>=<value>{$argValue}</value></part>";
                     } else {
                         $element .= "<part><name index=\"{$argIndex}\" /><value>{$part->out}</value></part>";
                         $argIndex++;
                     }
                 }
                 $element .= "</{$name}>";
             }
             # Advance input pointer
             $i += $matchingCount;
             # Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             # Re-add the old stack element if it still has unmatched opening characters remaining
             if ($matchingCount < $piece->count) {
                 $piece->parts = array(new PPDPart());
                 $piece->count -= $matchingCount;
                 # do we still qualify for any callback with remaining count?
                 $min = $this->rules[$piece->open]['min'];
                 if ($piece->count >= $min) {
                     $stack->push($piece);
                     $accum =& $stack->getAccum();
                 } else {
                     $accum .= str_repeat($piece->open, $piece->count);
                 }
             }
             $flags = $stack->getFlags();
             extract($flags);
             # Add XML element to the enclosing accumulator
             $accum .= $element;
         } elseif ($found == 'pipe') {
             $findEquals = true;
             // shortcut for getFlags()
             $stack->addPart();
             $accum =& $stack->getAccum();
             ++$i;
         } elseif ($found == 'equals') {
             $findEquals = false;
             // shortcut for getFlags()
             $stack->getCurrentPart()->eqpos = strlen($accum);
             $accum .= '=';
             ++$i;
         }
     }
     # Output any remaining unclosed brackets
     foreach ($stack->stack as $piece) {
         $stack->rootAccum .= $piece->breakSyntax();
     }
     $stack->rootAccum .= '</root>';
     $xml = $stack->rootAccum;
     return $xml;
 }
コード例 #2
0
 /**
  * Parser function to extension tag adaptor
  * @param Parser $parser
  * @param PPFrame $frame
  * @param PPNode[] $args
  * @return string
  */
 public static function tagObj($parser, $frame, $args)
 {
     if (!count($args)) {
         return '';
     }
     $tagName = strtolower(trim($frame->expand(array_shift($args))));
     if (count($args)) {
         $inner = $frame->expand(array_shift($args));
     } else {
         $inner = null;
     }
     $attributes = [];
     foreach ($args as $arg) {
         $bits = $arg->splitArg();
         if (strval($bits['index']) === '') {
             $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
             $value = trim($frame->expand($bits['value']));
             if (preg_match('/^(?:["\'](.+)["\']|""|\'\')$/s', $value, $m)) {
                 $value = isset($m[1]) ? $m[1] : '';
             }
             $attributes[$name] = $value;
         }
     }
     $stripList = $parser->getStripList();
     if (!in_array($tagName, $stripList)) {
         // we can't handle this tag (at least not now), so just re-emit it as an ordinary tag
         $attrText = '';
         foreach ($attributes as $name => $value) {
             $attrText .= ' ' . htmlspecialchars($name) . '="' . htmlspecialchars($value) . '"';
         }
         if ($inner === null) {
             return "<{$tagName}{$attrText}/>";
         }
         return "<{$tagName}{$attrText}>{$inner}</{$tagName}>";
     }
     $params = ['name' => $tagName, 'inner' => $inner, 'attributes' => $attributes, 'close' => "</{$tagName}>"];
     return $parser->extensionSubstitution($params, $frame);
 }
コード例 #3
0
 /**
  * Parser function to extension tag adaptor
  * @param Parser $parser
  * @param PPFrame $frame
  * @param array $args
  * @return string
  */
 public static function tagObj($parser, $frame, $args)
 {
     if (!count($args)) {
         return '';
     }
     $tagName = strtolower(trim($frame->expand(array_shift($args))));
     if (count($args)) {
         $inner = $frame->expand(array_shift($args));
     } else {
         $inner = null;
     }
     $stripList = $parser->getStripList();
     if (!in_array($tagName, $stripList)) {
         return '<span class="error">' . wfMessage('unknown_extension_tag', $tagName)->inContentLanguage()->text() . '</span>';
     }
     $attributes = array();
     foreach ($args as $arg) {
         $bits = $arg->splitArg();
         if (strval($bits['index']) === '') {
             $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
             $value = trim($frame->expand($bits['value']));
             if (preg_match('/^(?:["\'](.+)["\']|""|\'\')$/s', $value, $m)) {
                 $value = isset($m[1]) ? $m[1] : '';
             }
             $attributes[$name] = $value;
         }
     }
     $params = array('name' => $tagName, 'inner' => $inner, 'attributes' => $attributes, 'close' => "</{$tagName}>");
     return $parser->extensionSubstitution($params, $frame);
 }
コード例 #4
0
 /**
  * Preprocess some wikitext and return the document tree.
  * This is the ghost of Parser::replace_variables().
  *
  * @param $text String: the text to parse
  * @param $flags Integer: bitwise combination of:
  *          Parser::PTD_FOR_INCLUSION    Handle <noinclude>/<includeonly> as if the text is being
  *                                     included. Default is to assume a direct page view.
  *
  * The generated DOM tree must depend only on the input text and the flags.
  * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
  *
  * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  * change in the DOM tree for a given text, must be passed through the section identifier
  * in the section edit link and thus back to extractSections().
  *
  * The output of this function is currently only cached in process memory, but a persistent
  * cache may be implemented at a later date which takes further advantage of these strict
  * dependency requirements.
  *
  * @return PPNode_Hash_Tree
  */
 function preprocessToObj($text, $flags = 0)
 {
     wfProfileIn(__METHOD__);
     // Check cache.
     global $wgMemc, $wgPreprocessorCacheThreshold;
     $cacheable = $wgPreprocessorCacheThreshold !== false && strlen($text) > $wgPreprocessorCacheThreshold;
     if ($cacheable) {
         wfProfileIn(__METHOD__ . '-cacheable');
         $cacheKey = wfMemcKey('preprocess-hash', md5($text), $flags);
         $cacheValue = $wgMemc->get($cacheKey);
         if ($cacheValue) {
             $version = substr($cacheValue, 0, 8);
             if (intval($version) == self::CACHE_VERSION) {
                 $hash = unserialize(substr($cacheValue, 8));
                 // From the cache
                 wfDebugLog("Preprocessor", "Loaded preprocessor hash from memcached (key {$cacheKey})");
                 wfProfileOut(__METHOD__ . '-cacheable');
                 wfProfileOut(__METHOD__);
                 return $hash;
             }
         }
         wfProfileIn(__METHOD__ . '-cache-miss');
     }
     $rules = array('{' => array('end' => '}', 'names' => array(2 => 'template', 3 => 'tplarg'), 'min' => 2, 'max' => 3), '[' => array('end' => ']', 'names' => array(2 => null), 'min' => 2, 'max' => 2));
     $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
     $xmlishElements = $this->parser->getStripList();
     $enableOnlyinclude = false;
     if ($forInclusion) {
         $ignoredTags = array('includeonly', '/includeonly');
         $ignoredElements = array('noinclude');
         $xmlishElements[] = 'noinclude';
         if (strpos($text, '<onlyinclude>') !== false && strpos($text, '</onlyinclude>') !== false) {
             $enableOnlyinclude = true;
         }
     } else {
         $ignoredTags = array('noinclude', '/noinclude', 'onlyinclude', '/onlyinclude');
         $ignoredElements = array('includeonly');
         $xmlishElements[] = 'includeonly';
     }
     $xmlishRegex = implode('|', array_merge($xmlishElements, $ignoredTags));
     // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
     $elementsRegex = "~({$xmlishRegex})(?:\\s|\\/>|>)|(!--)~iA";
     $stack = new PPDStack_Hash();
     $searchBase = "[{<\n";
     $revText = strrev($text);
     // For fast reverse searches
     $i = 0;
     # Input pointer, starts out pointing to a pseudo-newline before the start
     $accum =& $stack->getAccum();
     # Current accumulator
     $findEquals = false;
     # True to find equals signs in arguments
     $findPipe = false;
     # True to take notice of pipe characters
     $headingIndex = 1;
     $inHeading = false;
     # True if $i is inside a possible heading
     $noMoreGT = false;
     # True if there are no more greater-than (>) signs right of $i
     $findOnlyinclude = $enableOnlyinclude;
     # True to ignore all input up to the next <onlyinclude>
     $fakeLineStart = true;
     # Do a line-start run without outputting an LF character
     while (true) {
         //$this->memCheck();
         if ($findOnlyinclude) {
             // Ignore all input up to the next <onlyinclude>
             $startPos = strpos($text, '<onlyinclude>', $i);
             if ($startPos === false) {
                 // Ignored section runs to the end
                 $accum->addNodeWithText('ignore', substr($text, $i));
                 break;
             }
             $tagEndPos = $startPos + strlen('<onlyinclude>');
             // past-the-end
             $accum->addNodeWithText('ignore', substr($text, $i, $tagEndPos - $i));
             $i = $tagEndPos;
             $findOnlyinclude = false;
         }
         if ($fakeLineStart) {
             $found = 'line-start';
             $curChar = '';
         } else {
             # Find next opening brace, closing brace or pipe
             $search = $searchBase;
             if ($stack->top === false) {
                 $currentClosing = '';
             } else {
                 $currentClosing = $stack->top->close;
                 $search .= $currentClosing;
             }
             if ($findPipe) {
                 $search .= '|';
             }
             if ($findEquals) {
                 // First equals will be for the template
                 $search .= '=';
             }
             $rule = null;
             # Output literal section, advance input counter
             $literalLength = strcspn($text, $search, $i);
             if ($literalLength > 0) {
                 $accum->addLiteral(substr($text, $i, $literalLength));
                 $i += $literalLength;
             }
             if ($i >= strlen($text)) {
                 if ($currentClosing == "\n") {
                     // Do a past-the-end run to finish off the heading
                     $curChar = '';
                     $found = 'line-end';
                 } else {
                     # All done
                     break;
                 }
             } else {
                 $curChar = $text[$i];
                 if ($curChar == '|') {
                     $found = 'pipe';
                 } elseif ($curChar == '=') {
                     $found = 'equals';
                 } elseif ($curChar == '<') {
                     $found = 'angle';
                 } elseif ($curChar == "\n") {
                     if ($inHeading) {
                         $found = 'line-end';
                     } else {
                         $found = 'line-start';
                     }
                 } elseif ($curChar == $currentClosing) {
                     $found = 'close';
                 } elseif (isset($rules[$curChar])) {
                     $found = 'open';
                     $rule = $rules[$curChar];
                 } else {
                     # Some versions of PHP have a strcspn which stops on null characters
                     # Ignore and continue
                     ++$i;
                     continue;
                 }
             }
         }
         if ($found == 'angle') {
             $matches = false;
             // Handle </onlyinclude>
             if ($enableOnlyinclude && substr($text, $i, strlen('</onlyinclude>')) == '</onlyinclude>') {
                 $findOnlyinclude = true;
                 continue;
             }
             // Determine element name
             if (!preg_match($elementsRegex, $text, $matches, 0, $i + 1)) {
                 // Element name missing or not listed
                 $accum->addLiteral('<');
                 ++$i;
                 continue;
             }
             // Handle comments
             if (isset($matches[2]) && $matches[2] == '!--') {
                 // To avoid leaving blank lines, when a comment is both preceded
                 // and followed by a newline (ignoring spaces), trim leading and
                 // trailing spaces and one of the newlines.
                 // Find the end
                 $endPos = strpos($text, '-->', $i + 4);
                 if ($endPos === false) {
                     // Unclosed comment in input, runs to end
                     $inner = substr($text, $i);
                     $accum->addNodeWithText('comment', $inner);
                     $i = strlen($text);
                 } else {
                     // Search backwards for leading whitespace
                     $wsStart = $i ? $i - strspn($revText, ' ', strlen($text) - $i) : 0;
                     // Search forwards for trailing whitespace
                     // $wsEnd will be the position of the last space (or the '>' if there's none)
                     $wsEnd = $endPos + 2 + strspn($text, ' ', $endPos + 3);
                     // Eat the line if possible
                     // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
                     // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
                     // it's a possible beneficial b/c break.
                     if ($wsStart > 0 && substr($text, $wsStart - 1, 1) == "\n" && substr($text, $wsEnd + 1, 1) == "\n") {
                         $startPos = $wsStart;
                         $endPos = $wsEnd + 1;
                         // Remove leading whitespace from the end of the accumulator
                         // Sanity check first though
                         $wsLength = $i - $wsStart;
                         if ($wsLength > 0 && $accum->lastNode instanceof PPNode_Hash_Text && substr($accum->lastNode->value, -$wsLength) === str_repeat(' ', $wsLength)) {
                             $accum->lastNode->value = substr($accum->lastNode->value, 0, -$wsLength);
                         }
                         // Do a line-start run next time to look for headings after the comment
                         $fakeLineStart = true;
                     } else {
                         // No line to eat, just take the comment itself
                         $startPos = $i;
                         $endPos += 2;
                     }
                     if ($stack->top) {
                         $part = $stack->top->getCurrentPart();
                         if (!(isset($part->commentEnd) && $part->commentEnd == $wsStart - 1)) {
                             $part->visualEnd = $wsStart;
                         }
                         // Else comments abutting, no change in visual end
                         $part->commentEnd = $endPos;
                     }
                     $i = $endPos + 1;
                     $inner = substr($text, $startPos, $endPos - $startPos + 1);
                     $accum->addNodeWithText('comment', $inner);
                 }
                 continue;
             }
             $name = $matches[1];
             $lowerName = strtolower($name);
             $attrStart = $i + strlen($name) + 1;
             // Find end of tag
             $tagEndPos = $noMoreGT ? false : strpos($text, '>', $attrStart);
             if ($tagEndPos === false) {
                 // Infinite backtrack
                 // Disable tag search to prevent worst-case O(N^2) performance
                 $noMoreGT = true;
                 $accum->addLiteral('<');
                 ++$i;
                 continue;
             }
             // Handle ignored tags
             if (in_array($lowerName, $ignoredTags)) {
                 $accum->addNodeWithText('ignore', substr($text, $i, $tagEndPos - $i + 1));
                 $i = $tagEndPos + 1;
                 continue;
             }
             $tagStartPos = $i;
             if ($text[$tagEndPos - 1] == '/') {
                 // Short end tag
                 $attrEnd = $tagEndPos - 1;
                 $inner = null;
                 $i = $tagEndPos + 1;
                 $close = null;
             } else {
                 $attrEnd = $tagEndPos;
                 // Find closing tag
                 if (preg_match("/<\\/" . preg_quote($name, '/') . "\\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1)) {
                     $inner = substr($text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1);
                     $i = $matches[0][1] + strlen($matches[0][0]);
                     $close = $matches[0][0];
                 } else {
                     // No end tag -- let it run out to the end of the text.
                     $inner = substr($text, $tagEndPos + 1);
                     $i = strlen($text);
                     $close = null;
                 }
             }
             // <includeonly> and <noinclude> just become <ignore> tags
             if (in_array($lowerName, $ignoredElements)) {
                 $accum->addNodeWithText('ignore', substr($text, $tagStartPos, $i - $tagStartPos));
                 continue;
             }
             if ($attrEnd <= $attrStart) {
                 $attr = '';
             } else {
                 // Note that the attr element contains the whitespace between name and attribute,
                 // this is necessary for precise reconstruction during pre-save transform.
                 $attr = substr($text, $attrStart, $attrEnd - $attrStart);
             }
             $extNode = new PPNode_Hash_Tree('ext');
             $extNode->addChild(PPNode_Hash_Tree::newWithText('name', $name));
             $extNode->addChild(PPNode_Hash_Tree::newWithText('attr', $attr));
             if ($inner !== null) {
                 $extNode->addChild(PPNode_Hash_Tree::newWithText('inner', $inner));
             }
             if ($close !== null) {
                 $extNode->addChild(PPNode_Hash_Tree::newWithText('close', $close));
             }
             $accum->addNode($extNode);
         } elseif ($found == 'line-start') {
             // Is this the start of a heading?
             // Line break belongs before the heading element in any case
             if ($fakeLineStart) {
                 $fakeLineStart = false;
             } else {
                 $accum->addLiteral($curChar);
                 $i++;
             }
             $count = strspn($text, '=', $i, 6);
             if ($count == 1 && $findEquals) {
                 // DWIM: This looks kind of like a name/value separator
                 // Let's let the equals handler have it and break the potential heading
                 // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
             } elseif ($count > 0) {
                 $piece = array('open' => "\n", 'close' => "\n", 'parts' => array(new PPDPart_Hash(str_repeat('=', $count))), 'startPos' => $i, 'count' => $count);
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 extract($stack->getFlags());
                 $i += $count;
             }
         } elseif ($found == 'line-end') {
             $piece = $stack->top;
             // A heading must be open, otherwise \n wouldn't have been in the search list
             assert($piece->open == "\n");
             $part = $piece->getCurrentPart();
             // Search back through the input to see if it has a proper close
             // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
             $wsLength = strspn($revText, " \t", strlen($text) - $i);
             $searchStart = $i - $wsLength;
             if (isset($part->commentEnd) && $searchStart - 1 == $part->commentEnd) {
                 // Comment found at line end
                 // Search for equals signs before the comment
                 $searchStart = $part->visualEnd;
                 $searchStart -= strspn($revText, " \t", strlen($text) - $searchStart);
             }
             $count = $piece->count;
             $equalsLength = strspn($revText, '=', strlen($text) - $searchStart);
             if ($equalsLength > 0) {
                 if ($searchStart - $equalsLength == $piece->startPos) {
                     // This is just a single string of equals signs on its own line
                     // Replicate the doHeadings behaviour /={count}(.+)={count}/
                     // First find out how many equals signs there really are (don't stop at 6)
                     $count = $equalsLength;
                     if ($count < 3) {
                         $count = 0;
                     } else {
                         $count = min(6, intval(($count - 1) / 2));
                     }
                 } else {
                     $count = min($equalsLength, $count);
                 }
                 if ($count > 0) {
                     // Normal match, output <h>
                     $element = new PPNode_Hash_Tree('possible-h');
                     $element->addChild(new PPNode_Hash_Attr('level', $count));
                     $element->addChild(new PPNode_Hash_Attr('i', $headingIndex++));
                     $element->lastChild->nextSibling = $accum->firstNode;
                     $element->lastChild = $accum->lastNode;
                 } else {
                     // Single equals sign on its own line, count=0
                     $element = $accum;
                 }
             } else {
                 // No match, no <h>, just pass down the inner text
                 $element = $accum;
             }
             // Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             extract($stack->getFlags());
             // Append the result to the enclosing accumulator
             if ($element instanceof PPNode) {
                 $accum->addNode($element);
             } else {
                 $accum->addAccum($element);
             }
             // Note that we do NOT increment the input pointer.
             // This is because the closing linebreak could be the opening linebreak of
             // another heading. Infinite loops are avoided because the next iteration MUST
             // hit the heading open case above, which unconditionally increments the
             // input pointer.
         } elseif ($found == 'open') {
             # count opening brace characters
             $count = strspn($text, $curChar, $i);
             # we need to add to stack only if opening brace count is enough for one of the rules
             if ($count >= $rule['min']) {
                 # Add it to the stack
                 $piece = array('open' => $curChar, 'close' => $rule['end'], 'count' => $count, 'lineStart' => $i > 0 && $text[$i - 1] == "\n");
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 extract($stack->getFlags());
             } else {
                 # Add literal brace(s)
                 $accum->addLiteral(str_repeat($curChar, $count));
             }
             $i += $count;
         } elseif ($found == 'close') {
             $piece = $stack->top;
             # lets check if there are enough characters for closing brace
             $maxCount = $piece->count;
             $count = strspn($text, $curChar, $i, $maxCount);
             # check for maximum matching characters (if there are 5 closing
             # characters, we will probably need only 3 - depending on the rules)
             $rule = $rules[$piece->open];
             if ($count > $rule['max']) {
                 # The specified maximum exists in the callback array, unless the caller
                 # has made an error
                 $matchingCount = $rule['max'];
             } else {
                 # Count is less than the maximum
                 # Skip any gaps in the callback array to find the true largest match
                 # Need to use array_key_exists not isset because the callback can be null
                 $matchingCount = $count;
                 while ($matchingCount > 0 && !array_key_exists($matchingCount, $rule['names'])) {
                     --$matchingCount;
                 }
             }
             if ($matchingCount <= 0) {
                 # No matching element found in callback array
                 # Output a literal closing brace and continue
                 $accum->addLiteral(str_repeat($curChar, $count));
                 $i += $count;
                 continue;
             }
             $name = $rule['names'][$matchingCount];
             if ($name === null) {
                 // No element, just literal text
                 $element = $piece->breakSyntax($matchingCount);
                 $element->addLiteral(str_repeat($rule['end'], $matchingCount));
             } else {
                 # Create XML element
                 # Note: $parts is already XML, does not need to be encoded further
                 $parts = $piece->parts;
                 $titleAccum = $parts[0]->out;
                 unset($parts[0]);
                 $element = new PPNode_Hash_Tree($name);
                 # The invocation is at the start of the line if lineStart is set in
                 # the stack, and all opening brackets are used up.
                 if ($maxCount == $matchingCount && !empty($piece->lineStart)) {
                     $element->addChild(new PPNode_Hash_Attr('lineStart', 1));
                 }
                 $titleNode = new PPNode_Hash_Tree('title');
                 $titleNode->firstChild = $titleAccum->firstNode;
                 $titleNode->lastChild = $titleAccum->lastNode;
                 $element->addChild($titleNode);
                 $argIndex = 1;
                 foreach ($parts as $part) {
                     if (isset($part->eqpos)) {
                         // Find equals
                         $lastNode = false;
                         for ($node = $part->out->firstNode; $node; $node = $node->nextSibling) {
                             if ($node === $part->eqpos) {
                                 break;
                             }
                             $lastNode = $node;
                         }
                         if (!$node) {
                             throw new MWException(__METHOD__ . ': eqpos not found');
                         }
                         if ($node->name !== 'equals') {
                             throw new MWException(__METHOD__ . ': eqpos is not equals');
                         }
                         $equalsNode = $node;
                         // Construct name node
                         $nameNode = new PPNode_Hash_Tree('name');
                         if ($lastNode !== false) {
                             $lastNode->nextSibling = false;
                             $nameNode->firstChild = $part->out->firstNode;
                             $nameNode->lastChild = $lastNode;
                         }
                         // Construct value node
                         $valueNode = new PPNode_Hash_Tree('value');
                         if ($equalsNode->nextSibling !== false) {
                             $valueNode->firstChild = $equalsNode->nextSibling;
                             $valueNode->lastChild = $part->out->lastNode;
                         }
                         $partNode = new PPNode_Hash_Tree('part');
                         $partNode->addChild($nameNode);
                         $partNode->addChild($equalsNode->firstChild);
                         $partNode->addChild($valueNode);
                         $element->addChild($partNode);
                     } else {
                         $partNode = new PPNode_Hash_Tree('part');
                         $nameNode = new PPNode_Hash_Tree('name');
                         $nameNode->addChild(new PPNode_Hash_Attr('index', $argIndex++));
                         $valueNode = new PPNode_Hash_Tree('value');
                         $valueNode->firstChild = $part->out->firstNode;
                         $valueNode->lastChild = $part->out->lastNode;
                         $partNode->addChild($nameNode);
                         $partNode->addChild($valueNode);
                         $element->addChild($partNode);
                     }
                 }
             }
             # Advance input pointer
             $i += $matchingCount;
             # Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             # Re-add the old stack element if it still has unmatched opening characters remaining
             if ($matchingCount < $piece->count) {
                 $piece->parts = array(new PPDPart_Hash());
                 $piece->count -= $matchingCount;
                 # do we still qualify for any callback with remaining count?
                 $names = $rules[$piece->open]['names'];
                 $skippedBraces = 0;
                 $enclosingAccum =& $accum;
                 while ($piece->count) {
                     if (array_key_exists($piece->count, $names)) {
                         $stack->push($piece);
                         $accum =& $stack->getAccum();
                         break;
                     }
                     --$piece->count;
                     $skippedBraces++;
                 }
                 $enclosingAccum->addLiteral(str_repeat($piece->open, $skippedBraces));
             }
             extract($stack->getFlags());
             # Add XML element to the enclosing accumulator
             if ($element instanceof PPNode) {
                 $accum->addNode($element);
             } else {
                 $accum->addAccum($element);
             }
         } elseif ($found == 'pipe') {
             $findEquals = true;
             // shortcut for getFlags()
             $stack->addPart();
             $accum =& $stack->getAccum();
             ++$i;
         } elseif ($found == 'equals') {
             $findEquals = false;
             // shortcut for getFlags()
             $accum->addNodeWithText('equals', '=');
             $stack->getCurrentPart()->eqpos = $accum->lastNode;
             ++$i;
         }
     }
     # Output any remaining unclosed brackets
     foreach ($stack->stack as $piece) {
         $stack->rootAccum->addAccum($piece->breakSyntax());
     }
     # Enable top-level headings
     for ($node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling) {
         if (isset($node->name) && $node->name === 'possible-h') {
             $node->name = 'h';
         }
     }
     $rootNode = new PPNode_Hash_Tree('root');
     $rootNode->firstChild = $stack->rootAccum->firstNode;
     $rootNode->lastChild = $stack->rootAccum->lastNode;
     // Cache
     if ($cacheable) {
         $cacheValue = sprintf("%08d", self::CACHE_VERSION) . serialize($rootNode);
         $wgMemc->set($cacheKey, $cacheValue, 86400);
         wfProfileOut(__METHOD__ . '-cache-miss');
         wfProfileOut(__METHOD__ . '-cacheable');
         wfDebugLog("Preprocessor", "Saved preprocessor Hash to memcached (key {$cacheKey})");
     }
     wfProfileOut(__METHOD__);
     return $rootNode;
 }
コード例 #5
0
 public function getStripList()
 {
     return array_merge((array) parent::getStripList(), array('noinclude', 'includeonly', 'onlyinclude', 'references'));
 }