/** * Convert wikitext to HTML * Do not call this function recursively. * * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); $this->mInputSize = strlen($text); if ($this->mOptions->getEnableLimitReport()) { $this->mOutput->resetParseStartTime(); } # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; $oldRevisionSize = $this->mRevisionSize; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; $this->mRevisionSize = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $cpuTime = $this->mOutput->getTimeSinceStart('cpu'); if ($cpuTime !== null) { $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime)); } $wallTime = $this->mOutput->getTimeSinceStart('wall'); $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime)); $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max)); $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max)); $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth())); $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit())); wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput)); $limitReport = "NewPP limit report\n"; if ($wgShowHostnames) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } foreach ($this->mOutput->getLimitReportData() as $key => $value) { if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) { $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false); $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false); if (!$valueMsg->exists()) { $valueMsg = new RawMessage('$1'); } if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) { $valueMsg->params($value); $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; } } } // Since we're not really outputting HTML, decode the entities and // then re-encode the things that need hiding inside HTML comments. $limitReport = htmlspecialchars_decode($limitReport); wfRunHooks('ParserLimitReport', array($this, &$limitReport)); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace(array('-', '&'), array('‐', '&'), $limitReport); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; $this->mRevisionSize = $oldRevisionSize; $this->mInputSize = false; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { # Run convert unconditionally in 1.18-compatible mode global $wgBug34832TransitionalRollback; if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Helper function for parse() that transforms half-parsed HTML into fully * parsed HTML. * * @param string $text * @param bool $isMain * @param bool $linestart * @return string */ private function internalParseHalfParsed($text, $isMain = true, $linestart = true) { $text = $this->mStripState->unstripGeneral($text); if ($isMain) { Hooks::run('ParserAfterUnstrip', array(&$this, &$text)); } # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } $text = $this->mStripState->unstripNoWiki($text); if ($isMain) { Hooks::run('ParserBeforeTidy', array(&$this, &$text)); } $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if (MWTidy::isEnabled() && $this->mOptions->getTidy()) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($isMain) { Hooks::run('ParserAfterTidy', array(&$this, &$text)); } return $text; }
/** * Parse the form definition and return it */ public static function getFormDefinition(Parser $parser, $form_def = null, $form_id = null) { if ($form_id !== null) { $cachedDef = self::getFormDefinitionFromCache($form_id, $parser); if ($cachedDef !== null) { return $cachedDef; } } if ($form_id !== null) { $form_title = Title::newFromID($form_id); $form_def = SFUtils::getPageText($form_title); } elseif ($form_def == null) { // No id, no text -> nothing to do return ''; } // Remove <noinclude> sections and <includeonly> tags from form definition $form_def = StringUtils::delimiterReplace('<noinclude>', '</noinclude>', '', $form_def); $form_def = strtr($form_def, array('<includeonly>' => '', '</includeonly>' => '')); // We need to replace all SF tags in the form definition by strip items. But we can not just use // the Parser strip state because the Parser would during parsing replace all strip items and then // mangle them into HTML code. So we have to use our own. Which means we also can not just use // Parser::insertStripItem() (see below). $prefix = "UNIQ" . Parser::getRandomString(); $stripState = new StripState($prefix); // This regexp will find any SF triple braced tags (including correct handling of contained braces), i.e. // {{{field|foo|default={{Bar}}}}} is not a problem. When used with preg_match and friends, $matches[0] will // contain the whole SF tag, $matches[1] will contain the tag without the enclosing triple braces. $regexp = '#\\{\\{\\{((?>[^\\{\\}]+)|(\\{((?>[^\\{\\}]+)|(?-2))*\\}))*\\}\\}\\}#'; // replace all SF tags by strip markers $form_def = preg_replace_callback($regexp, function (array $matches) use($stripState, $prefix) { static $markerIndex = 0; $rnd = "{$prefix}-item-{$markerIndex}-" . Parser::MARKER_SUFFIX; $markerIndex++; $stripState->addGeneral($rnd, $matches[0]); return $rnd; }, $form_def); $title = is_object($parser->getTitle()) ? $parser->getTitle() : new Title(); // parse wiki-text $output = $parser->parse($form_def, $title, $parser->getOptions()); $form_def = $stripState->unstripGeneral($output->getText()); if ($output->getCacheTime() == -1) { $form_article = Article::newFromID($form_id); self::purgeCache($form_article); wfDebug("Caching disabled for form definition {$form_id}\n"); } elseif ($form_id !== null) { self::cacheFormDefinition($form_id, $form_def, $parser); } return $form_def; }