protected function parseWikitext($title, $newRevId) { $apiParams = array('action' => 'parse', 'page' => $title->getPrefixedDBkey(), 'oldid' => $newRevId, 'prop' => 'text|revid|categorieshtml|displaytitle|modules|jsconfigvars'); $api = new ApiMain(new DerivativeRequest($this->getRequest(), $apiParams, false), true); $api->execute(); if (defined('ApiResult::META_CONTENT')) { $result = $api->getResult()->getResultData(null, array('BC' => array(), 'Types' => array(), 'Strip' => 'all')); } else { $result = $api->getResultData(); } $content = isset($result['parse']['text']['*']) ? $result['parse']['text']['*'] : false; $categorieshtml = isset($result['parse']['categorieshtml']['*']) ? $result['parse']['categorieshtml']['*'] : false; $links = isset($result['parse']['links']) ? $result['parse']['links'] : array(); $revision = Revision::newFromId($result['parse']['revid']); $timestamp = $revision ? $revision->getTimestamp() : wfTimestampNow(); $displaytitle = isset($result['parse']['displaytitle']) ? $result['parse']['displaytitle'] : false; $modules = isset($result['parse']['modules']) ? $result['parse']['modules'] : array(); $jsconfigvars = isset($result['parse']['jsconfigvars']) ? $result['parse']['jsconfigvars'] : array(); if ($content === false || strlen($content) && $revision === null) { return false; } if ($displaytitle !== false) { // Escape entities as in OutputPage::setPageTitle() $displaytitle = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($displaytitle)); } return array('content' => $content, 'categorieshtml' => $categorieshtml, 'basetimestamp' => $timestamp, 'starttimestamp' => wfTimestampNow(), 'displayTitleHtml' => $displaytitle, 'modules' => $modules, 'jsconfigvars' => $jsconfigvars); }
/** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * * @param Parser $parser Parent parser * @param string $text Desired title text * @param string $uarg * @return string */ public static function displaytitle($parser, $text = '', $uarg = '') { global $wgRestrictDisplayTitle; static $magicWords = null; if (is_null($magicWords)) { $magicWords = new MagicWordArray(['displaytitle_noerror', 'displaytitle_noreplace']); } $arg = $magicWords->matchStartToEnd($uarg); // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes($text); // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = $parser->killMarkers($text); // list of disallowed tags for DISPLAYTITLE // these will be escaped even though they are allowed in normal wiki text $bad = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rtc', 'rp', 'br']; // disallow some styles that could be used to bypass $wgRestrictDisplayTitle if ($wgRestrictDisplayTitle) { $htmlTagsCallback = function (&$params) { $decoded = Sanitizer::decodeTagAttributes($params); if (isset($decoded['style'])) { // this is called later anyway, but we need it right now for the regexes below to be safe // calling it twice doesn't hurt $decoded['style'] = Sanitizer::checkCss($decoded['style']); if (preg_match('/(display|user-select|visibility)\\s*:/i', $decoded['style'])) { $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; } } $params = Sanitizer::safeEncodeTagAttributes($decoded); }; } else { $htmlTagsCallback = null; } // only requested titles that normalize to the actual title are allowed through // if $wgRestrictDisplayTitle is true (it is by default) // mimic the escaping process that occurs in OutputPage::setPageTitle $text = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($text, $htmlTagsCallback, [], [], $bad)); $title = Title::newFromText(Sanitizer::stripAllTags($text)); if (!$wgRestrictDisplayTitle || $title instanceof Title && !$title->hasFragment() && $title->equals($parser->mTitle)) { $old = $parser->mOutput->getProperty('displaytitle'); if ($old === false || $arg !== 'displaytitle_noreplace') { $parser->mOutput->setDisplayTitle($text); } if ($old !== false && $old !== $text && !$arg) { $converter = $parser->getConverterLanguage()->getConverter(); return '<span class="error">' . wfMessage('duplicate-displaytitle', $converter->markNoConversion(wfEscapeWikiText($old)), $converter->markNoConversion(wfEscapeWikiText($text)))->inContentLanguage()->text() . '</span>'; } else { return ''; } } else { $parser->addTrackingCategory('restricted-displaytitle-ignored'); $converter = $parser->getConverterLanguage()->getConverter(); return '<span class="error">' . wfMessage('restricted-displaytitle', $converter->markNoConversion(wfEscapeWikiText($text)))->inContentLanguage()->text() . '</span>'; } }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); $this->mInputSize = strlen($text); if ($this->mOptions->getEnableLimitReport()) { $this->mOutput->resetParseStartTime(); } # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; $oldRevisionSize = $this->mRevisionSize; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; $this->mRevisionSize = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $cpuTime = $this->mOutput->getTimeSinceStart('cpu'); if ($cpuTime !== null) { $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime)); } $wallTime = $this->mOutput->getTimeSinceStart('wall'); $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime)); $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max)); $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max)); $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth())); $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit())); wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput)); $limitReport = "NewPP limit report\n"; if ($wgShowHostnames) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } foreach ($this->mOutput->getLimitReportData() as $key => $value) { if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) { $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false); $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false); if (!$valueMsg->exists()) { $valueMsg = new RawMessage('$1'); } if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) { $valueMsg->params($value); $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; } } } // Since we're not really outputting HTML, decode the entities and // then re-encode the things that need hiding inside HTML comments. $limitReport = htmlspecialchars_decode($limitReport); wfRunHooks('ParserLimitReport', array($this, &$limitReport)); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace(array('-', '&'), array('‐', '&'), $limitReport); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; $this->mRevisionSize = $oldRevisionSize; $this->mInputSize = false; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Helper function for parse() that transforms half-parsed HTML into fully * parsed HTML. * * @param string $text * @param bool $isMain * @param bool $linestart * @return string */ private function internalParseHalfParsed($text, $isMain = true, $linestart = true) { $text = $this->mStripState->unstripGeneral($text); if ($isMain) { Hooks::run('ParserAfterUnstrip', array(&$this, &$text)); } # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } $text = $this->mStripState->unstripNoWiki($text); if ($isMain) { Hooks::run('ParserBeforeTidy', array(&$this, &$text)); } $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if (MWTidy::isEnabled() && $this->mOptions->getTidy()) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($isMain) { Hooks::run('ParserAfterTidy', array(&$this, &$text)); } return $text; }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title A title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); if ($clearState) { $this->clearState(); } $this->mOptions = $options; $this->setTitle($title); $oldRevisionId = $this->mRevisionId; $oldRevisionTimestamp = $this->mRevisionTimestamp; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionTimestamp = null; } $this->setOutputType(self::OT_HTML); wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); # the position of the parserConvert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. # Side-effects: this calls $this->mOutput->setTitleText() $text = $wgContLang->parserConvert($text, $this); $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); //!JF Move to its own function $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys($this->mTransparentTagHooks); $text = self::extractTagsAndParams($elements, $text, $matches, $uniq_prefix); foreach ($matches as $marker => $data) { list($element, $content, $params, $tag) = $data; $tagName = strtolower($element); if (isset($this->mTransparentTagHooks[$tagName])) { $output = call_user_func_array($this->mTransparentTagHooks[$tagName], array($content, $params, $this)); } else { $output = $tag; } $this->mStripState->general->setPair($marker, $output); } $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->mTidy || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } global $wgExpensiveParserFunctionLimit; if ($this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { global $wgExpensiveParserFunctionLimit; $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$wgExpensiveParserFunctionLimit}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionTimestamp = $oldRevisionTimestamp; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { # Run convert unconditionally in 1.18-compatible mode global $wgBug34832TransitionalRollback; if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Normalize whitespace and character references in an XML source- * encoded text for an attribute value. * * See http://www.w3.org/TR/REC-xml/#AVNormalize for background, * but note that we're not returning the value, but are returning * XML source fragments that will be slapped into output. * * @param $text String * @return String */ private static function normalizeAttributeValue($text) { return str_replace('"', '"', self::normalizeWhitespace(Sanitizer::normalizeCharReferences($text))); }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title A title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang, $wgDisableLangConversion, $wgDisableTitleConversion; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); if ($clearState) { $this->clearState(); } $this->mOptions = $options; $this->setTitle($title); // Page title has to be set for the pre-processor $oldRevisionId = $this->mRevisionId; $oldRevisionTimestamp = $this->mRevisionTimestamp; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionTimestamp = null; } $this->setOutputType(self::OT_HTML); wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The page doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table */ if (!($wgDisableLangConversion || isset($this->mDoubleUnderscores['nocontentconvert']) || $this->mTitle->isConversionTable())) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $wgContLang->convert($text); } /** * A page get its title converted except: * a) Language conversion is globally disabled * b) Title convert is globally disabled * c) The page is a redirect page * d) User request with a "linkconvert" set to "no" * e) A "nocontentconvert" magic word has been set * f) A "notitleconvert" magic word has been set * g) User sets "noconvertlink" in his/her preference * * Note that if a user tries to set a title in a conversion * rule but content conversion was not done, then the parser * won't pick it up. This is probably expected behavior. */ if (!($wgDisableLangConversion || $wgDisableTitleConversion || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $wgContLang->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $wgContLang->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); //!JF Move to its own function $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys($this->mTransparentTagHooks); $text = self::extractTagsAndParams($elements, $text, $matches, $uniq_prefix); foreach ($matches as $marker => $data) { list($element, $content, $params, $tag) = $data; $tagName = strtolower($element); if (isset($this->mTransparentTagHooks[$tagName])) { $output = call_user_func_array($this->mTransparentTagHooks[$tagName], array($content, $params, $this)); } else { $output = $tag; } $this->mStripState->general->setPair($marker, $output); } $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->mTidy || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } global $wgExpensiveParserFunctionLimit; if ($this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$wgExpensiveParserFunctionLimit}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionTimestamp = $oldRevisionTimestamp; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
static function render($input, $params, $parser) { global $wgScriptPath, $wgUser, $wgUrlProtocols, $wgNoFollowLinks; wfLoadExtensionMessages('ImageMap'); $lines = explode("\n", $input); $first = true; $lineNum = 0; $mapHTML = ''; $links = array(); # Define canonical desc types to allow i18n of 'imagemap_desc_types' $descTypesCanonical = 'top-right, bottom-right, bottom-left, top-left, none'; $descType = self::BOTTOM_RIGHT; $defaultLinkAttribs = false; $realmap = true; foreach ($lines as $line) { ++$lineNum; $externLink = false; $line = trim($line); if ($line == '' || $line[0] == '#') { continue; } if ($first) { $first = false; # The first line should have an image specification on it # Extract it and render the HTML $bits = explode('|', $line, 2); if (count($bits) == 1) { $image = $bits[0]; $options = ''; } else { list($image, $options) = $bits; } $imageTitle = Title::newFromText($image); if (!$imageTitle || $imageTitle->getNamespace() != NS_IMAGE) { return self::error('imagemap_no_image'); } if (wfIsBadImage($imageTitle->getDBkey(), $parser->mTitle)) { return self::error('imagemap_bad_image'); } // Parse the options so we can use links and the like in the caption $parsedOptions = $parser->recursiveTagParse($options); $imageHTML = $parser->makeImage($imageTitle, $parsedOptions); $parser->replaceLinkHolders($imageHTML); $imageHTML = $parser->mStripState->unstripBoth($imageHTML); $imageHTML = Sanitizer::normalizeCharReferences($imageHTML); $parser->mOutput->addImage($imageTitle->getDBkey()); $domDoc = new DOMDocument(); wfSuppressWarnings(); $ok = $domDoc->loadXML($imageHTML); wfRestoreWarnings(); if (!$ok) { return self::error('imagemap_invalid_image'); } $xpath = new DOMXPath($domDoc); $imgs = $xpath->query('//img'); if (!$imgs->length) { return self::error('imagemap_invalid_image'); } $imageNode = $imgs->item(0); $thumbWidth = $imageNode->getAttribute('width'); $thumbHeight = $imageNode->getAttribute('height'); if (function_exists('wfFindFile')) { $imageObj = wfFindFile($imageTitle); } else { // Old MW $imageObj = wfFindFile($imageTitle); } if (!$imageObj || !$imageObj->exists()) { return self::error('imagemap_invalid_image'); } # Add the linear dimensions to avoid inaccuracy in the scale # factor when one is much larger than the other # (sx+sy)/(x+y) = s $denominator = $imageObj->getWidth() + $imageObj->getHeight(); $numerator = $thumbWidth + $thumbHeight; if ($denominator <= 0 || $numerator <= 0) { return self::error('imagemap_invalid_image'); } $scale = $numerator / $denominator; continue; } # Handle desc spec $cmd = strtok($line, " \t"); if ($cmd == 'desc') { $typesText = wfMsgForContent('imagemap_desc_types'); if ($descTypesCanonical != $typesText) { // i18n desc types exists $typesText = $descTypesCanonical . ', ' . $typesText; } $types = array_map('trim', explode(',', $typesText)); $type = trim(strtok('')); $descType = array_search($type, $types); if ($descType > 4) { // A localized descType is used. Subtract 5 to reach the canonical desc type. $descType = $descType - 5; } if ($descType === false || $descType < 0) { // <0? In theory never, but paranoia... return self::error('imagemap_invalid_desc', $typesText); } continue; } # Find the link $link = trim(strstr($line, '[')); if (preg_match('/^ \\[\\[ ([^|]*+) \\| ([^\\]]*+) \\]\\] \\w* $ /x', $link, $m)) { $title = Title::newFromText($m[1]); $alt = trim($m[2]); } elseif (preg_match('/^ \\[\\[ ([^\\]]*+) \\]\\] \\w* $ /x', $link, $m)) { $title = Title::newFromText($m[1]); if (is_null($title)) { return self::error('imagemap_invalid_title', $lineNum); } $alt = $title->getFullText(); } elseif (in_array(substr($link, 1, strpos($link, '//') + 1), $wgUrlProtocols) || in_array(substr($link, 1, strpos($link, ':')), $wgUrlProtocols)) { if (preg_match('/^ \\[ ([^\\s]*+) \\s ([^\\]]*+) \\] \\w* $ /x', $link, $m)) { $title = $m[1]; $alt = trim($m[2]); $externLink = true; } elseif (preg_match('/^ \\[ ([^\\]]*+) \\] \\w* $ /x', $link, $m)) { $title = $alt = trim($m[1]); $externLink = true; } } else { return self::error('imagemap_no_link', $lineNum); } if (!$title) { return self::error('imagemap_invalid_title', $lineNum); } $shapeSpec = substr($line, 0, -strlen($link)); # Tokenize shape spec $shape = strtok($shapeSpec, " \t"); switch ($shape) { case 'default': $coords = array(); break; case 'rect': $coords = self::tokenizeCoords(4, $lineNum); if (!is_array($coords)) { return $coords; } break; case 'circle': $coords = self::tokenizeCoords(3, $lineNum); if (!is_array($coords)) { return $coords; } break; case 'poly': $coords = array(); $coord = strtok(" \t"); while ($coord !== false) { $coords[] = $coord; $coord = strtok(" \t"); } if (!count($coords)) { return self::error('imagemap_missing_coord', $lineNum); } if (count($coords) % 2 !== 0) { return self::error('imagemap_poly_odd', $lineNum); } break; default: return self::error('imagemap_unrecognised_shape', $lineNum); } # Scale the coords using the size of the source image foreach ($coords as $i => $c) { $coords[$i] = intval(round($c * $scale)); } # Construct the area tag $attribs = array(); if ($externLink) { $attribs['href'] = $title; $attribs['class'] = 'plainlinks'; if ($wgNoFollowLinks) { $attribs['rel'] = 'nofollow'; } } else { if ($title->getFragment() != '' && $title->getPrefixedDBkey() == '') { # XXX: kluge to handle [[#Fragment]] links, should really fix getLocalURL() # in Title.php to return an empty string in this case $attribs['href'] = $title->getFragmentForURL(); } else { $attribs['href'] = $title->escapeLocalURL() . $title->getFragmentForURL(); } } if ($shape != 'default') { $attribs['shape'] = $shape; } if ($coords) { $attribs['coords'] = implode(',', $coords); } if ($alt != '') { if ($shape != 'default') { $attribs['alt'] = $alt; } $attribs['title'] = $alt; } if ($shape == 'default') { $defaultLinkAttribs = $attribs; } else { $mapHTML .= Xml::element('area', $attribs) . "\n"; } if ($externLink) { $extLinks[] = $title; } else { $links[] = $title; } } if ($first) { return self::error('imagemap_no_image'); } if ($mapHTML == '' && $defaultLinkAttribs == '') { return self::error('imagemap_no_areas'); } elseif ($mapHTML == '' && $defaultLinkAttribs != '') { // no areas defined, default only. It's not a real imagemap, so we do not need some tags $realmap = false; } if ($realmap) { # Construct the map # Add random number to avoid breaking cached HTML fragments that are # later joined together on the one page (bug 16471) $mapName = "ImageMap_" . ++self::$id . '_' . mt_rand(0, 0x7fffffff); $mapHTML = "<map name=\"{$mapName}\">\n{$mapHTML}</map>\n"; # Alter the image tag $imageNode->setAttribute('usemap', "#{$mapName}"); } # Add a surrounding div, remove the default link to the description page $anchor = $imageNode->parentNode; $parent = $anchor->parentNode; $div = $parent->insertBefore(new DOMElement('div'), $anchor); if ($defaultLinkAttribs) { $defaultAnchor = $div->appendChild(new DOMElement('a')); foreach ($defaultLinkAttribs as $name => $value) { $defaultAnchor->setAttribute($name, $value); } $imageParent = $defaultAnchor; } else { $imageParent = $div; } # Add the map HTML to the div # We used to add it before the div, but that made tidy unhappy if ($mapHTML != '') { $mapDoc = new DOMDocument(); $mapDoc->loadXML($mapHTML); $mapNode = $domDoc->importNode($mapDoc->documentElement, true); $div->appendChild($mapNode); } $imageParent->appendChild($imageNode->cloneNode(true)); $parent->removeChild($anchor); # Determine whether a "magnify" link is present $xpath = new DOMXPath($domDoc); $magnify = $xpath->query('//div[@class="magnify"]'); if (!$magnify->length && $descType != self::NONE) { # Add image description link if ($descType == self::TOP_LEFT || $descType == self::BOTTOM_LEFT) { $marginLeft = 0; } else { $marginLeft = $thumbWidth - 20; } if ($descType == self::TOP_LEFT || $descType == self::TOP_RIGHT) { $marginTop = -$thumbHeight; // 1px hack for IE, to stop it poking out the top $marginTop += 1; } else { $marginTop = -20; } $div->setAttribute('style', "height: {$thumbHeight}px; width: {$thumbWidth}px; "); $descWrapper = $div->appendChild(new DOMElement('div')); $descWrapper->setAttribute('style', "margin-left: {$marginLeft}px; " . "margin-top: {$marginTop}px; " . "text-align: left;"); $descAnchor = $descWrapper->appendChild(new DOMElement('a')); $descAnchor->setAttribute('href', $imageTitle->escapeLocalURL()); $descAnchor->setAttribute('title', wfMsgForContent('imagemap_description')); $descImg = $descAnchor->appendChild(new DOMElement('img')); $descImg->setAttribute('alt', wfMsgForContent('imagemap_description')); $descImg->setAttribute('src', "{$wgScriptPath}/extensions/ImageMap/desc-20.png"); $descImg->setAttribute('style', 'border: none;'); } # Output the result # We use saveXML() not saveHTML() because then we get XHTML-compliant output. # The disadvantage is that we have to strip out the DTD $output = preg_replace('/<\\?xml[^?]*\\?>/', '', $domDoc->saveXML()); # Register links foreach ($links as $title) { if ($title->isExternal() || $title->getNamespace() == NS_SPECIAL) { // Don't register special or interwiki links... } elseif ($title->getNamespace() == NS_MEDIA) { // Regular Media: links are recorded as image usages $parser->mOutput->addImage($title->getDBkey()); } else { // Plain ol' link $parser->mOutput->addLink($title); } } if (isset($extLinks)) { foreach ($extLinks as $title) { $parser->mOutput->addExternalLink($title); } } # Armour output against broken parser $output = str_replace("\n", '', $output); return $output; }
/** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * * @param $parser Parser: parent parser * @param string $text desired title text * @return String */ static function displaytitle($parser, $text = '') { global $wgRestrictDisplayTitle; // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes($text); // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = preg_replace('/' . preg_quote($parser->uniqPrefix(), '/') . '.*?' . preg_quote(Parser::MARKER_SUFFIX, '/') . '/', '', $text); // list of disallowed tags for DISPLAYTITLE // these will be escaped even though they are allowed in normal wiki text $bad = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rp', 'br'); // disallow some styles that could be used to bypass $wgRestrictDisplayTitle if ($wgRestrictDisplayTitle) { $htmlTagsCallback = function (&$params) { $decoded = Sanitizer::decodeTagAttributes($params); if (isset($decoded['style'])) { // this is called later anyway, but we need it right now for the regexes below to be safe // calling it twice doesn't hurt $decoded['style'] = Sanitizer::checkCss($decoded['style']); if (preg_match('/(display|user-select|visibility)\\s*:/i', $decoded['style'])) { $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; } } $params = Sanitizer::safeEncodeTagAttributes($decoded); }; } else { $htmlTagsCallback = null; } // only requested titles that normalize to the actual title are allowed through // if $wgRestrictDisplayTitle is true (it is by default) // mimic the escaping process that occurs in OutputPage::setPageTitle $text = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($text, $htmlTagsCallback, array(), array(), $bad)); $title = Title::newFromText(Sanitizer::stripAllTags($text)); if (!$wgRestrictDisplayTitle) { $parser->mOutput->setDisplayTitle($text); } elseif ($title instanceof Title && !$title->hasFragment() && $title->equals($parser->mTitle)) { $parser->mOutput->setDisplayTitle($text); } return ''; }
/** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * * @param $parser Parser: parent parser * @param string $text desired title text * @return String */ static function displaytitle($parser, $text = '') { global $wgRestrictDisplayTitle; #parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes($text); #remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = preg_replace('/' . preg_quote($parser->uniqPrefix(), '/') . '.*?' . preg_quote(Parser::MARKER_SUFFIX, '/') . '/', '', $text); #list of disallowed tags for DISPLAYTITLE #these will be escaped even though they are allowed in normal wiki text $bad = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rp', 'br'); #only requested titles that normalize to the actual title are allowed through #if $wgRestrictDisplayTitle is true (it is by default) #mimic the escaping process that occurs in OutputPage::setPageTitle $text = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($text, null, array(), array(), $bad)); $title = Title::newFromText(Sanitizer::stripAllTags($text)); if (!$wgRestrictDisplayTitle) { $parser->mOutput->setDisplayTitle($text); } elseif ($title instanceof Title && $title->getFragment() == '' && $title->equals($parser->mTitle)) { $parser->mOutput->setDisplayTitle($text); } return ''; }
/** * Normalize whitespace and character references in an XML source- * encoded text for an attribute value. * * See http://www.w3.org/TR/REC-xml/#AVNormalize for background, * but note that we're not returning the value, but are returning * XML source fragments that will be slapped into output. * * @param string $text * @return string * @access private */ function normalizeAttributeValue($text) { return str_replace('"', '"', preg_replace('/\\r\\n|[\\x20\\x0d\\x0a\\x09]/', ' ', Sanitizer::normalizeCharReferences($text))); }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @private * @param string $text Text we want to parse * @param Title &$title A title object * @param array $options * @param boolean $linestart * @param boolean $clearState * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ function parse($text, &$title, $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; $fname = 'Parser::parse-' . wfGetCaller(); wfProfileIn($fname); if ($clearState) { $this->clearState(); } $this->mOptions = $options; $this->mTitle =& $title; $oldRevisionId = $this->mRevisionId; if ($revid !== null) { $this->mRevisionId = $revid; } $this->setOutputType(OT_HTML); //$text = $this->strip( $text, $this->mStripState ); // VOODOO MAGIC FIX! Sometimes the above segfaults in PHP5. $x =& $this->mStripState; wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$x)); $text = $this->strip($text, $x); wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$x)); $text = $this->internalParse($text); $text = $this->unstrip($text, $this->mStripState); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2', '/(\\302\\253) /' => '\\1 '); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); # only once and last $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); # the position of the parserConvert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. # Side-effects: this calls $this->mOutput->setTitleText() $text = $wgContLang->parserConvert($text, $this); $text = $this->unstripNoWiki($text, $this->mStripState); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy and $this->mOptions->mTidy or $wgAlwaysUseTidy) { $text = Parser::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if (max($this->mIncludeSizes) > 1000) { $max = $this->mOptions->getMaxIncludeSize(); $text .= "<!-- \n" . "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" . "Maximum: {$max} bytes\n" . "-->\n"; } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; wfProfileOut($fname); return $this->mOutput; }
/** * Returns message in the requested format * @param string $key Key of the message * @param array $options Processing rules: * <i>parse</i>: parses wikitext to html * <i>parseinline</i>: parses wikitext to html and removes the surrounding p's added by parser or tidy * <i>escape</i>: filters message through htmlspecialchars * <i>escapenoentities</i>: same, but allows entity references like through * <i>replaceafter</i>: parameters are substituted after parsing or escaping * <i>parsemag</i>: transform the message using magic phrases * <i>content</i>: fetch message for content language instead of interface * Behavior for conflicting options (e.g., parse+parseinline) is undefined. */ function wfMsgExt($key, $options) { global $wgOut, $wgParser; $args = func_get_args(); array_shift($args); array_shift($args); if (!is_array($options)) { $options = array($options); } $forContent = false; if (in_array('content', $options)) { $forContent = true; } $string = wfMsgGetKey($key, true, $forContent, false); if (!in_array('replaceafter', $options)) { $string = wfMsgReplaceArgs($string, $args); } if (in_array('parse', $options)) { $string = $wgOut->parse($string, true, !$forContent); } elseif (in_array('parseinline', $options)) { $string = $wgOut->parse($string, true, !$forContent); $m = array(); if (preg_match('/^<p>(.*)\\n?<\\/p>\\n?$/sU', $string, $m)) { $string = $m[1]; } } elseif (in_array('parsemag', $options)) { global $wgMessageCache; if (isset($wgMessageCache)) { $string = $wgMessageCache->transform($string, !$forContent); } } if (in_array('escape', $options)) { $string = htmlspecialchars($string); } elseif (in_array('escapenoentities', $options)) { $string = htmlspecialchars($string); $string = str_replace('&', '&', $string); $string = Sanitizer::normalizeCharReferences($string); } if (in_array('replaceafter', $options)) { $string = wfMsgReplaceArgs($string, $args); } return $string; }
protected function addAttributes(ChangeText $txt, array $attributes) { if (count($attributes) < 1) { return; } $keys = array_keys($attributes); $txt->addHtml(Sanitizer::normalizeCharReferences(' with "' . $keys[0] . '" attribute as "' . $attributes[$keys[0]] . '"')); $nbAttributes_min_1 = count($attributes) - 1; for ($i = 1; $i < $nbAttributes_min_1; $i++) { $key = $keys[$i]; $attr = $attributes[$key]; $txt->addHtml(Sanitizer::normalizeCharReferences(', with "' . $key . '" attribute as "' . $attr . '"')); } if ($nbAttributes_min_1 > 1) { $txt->addHtml(Sanitizer::normalizeCharReferences(' and with "' . $keys[$nbAttributes_min_1] . '" attribute as "' . $attributes[$keys[$nbAttributes_min_1]] . '"')); } }
/** * "Page title" means the contents of \<h1\>. It is stored as a valid HTML fragment. * This function allows good tags like \<sup\> in the \<h1\> tag, but not bad tags like \<script\>. * This function automatically sets \<title\> to the same content as \<h1\> but with all tags removed. * Bad tags that were escaped in \<h1\> will still be escaped in \<title\>, and good tags like \<i\> will be dropped entirely. * * @param $name string */ public function setPageTitle($name) { # change "<script>foo&bar</script>" to "<script>foo&bar</script>" # but leave "<i>foobar</i>" alone $nameWithTags = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($name)); $this->mPagetitle = $nameWithTags; # change "<i>foo&bar</i>" to "foo&bar" $this->setHTMLTitle(wfMsg('pagetitle', Sanitizer::stripAllTags($nameWithTags))); }
/** * "Page title" means the contents of \<h1\>. It is stored as a valid HTML * fragment. This function allows good tags like \<sup\> in the \<h1\> tag, * but not bad tags like \<script\>. This function automatically sets * \<title\> to the same content as \<h1\> but with all tags removed. Bad * tags that were escaped in \<h1\> will still be escaped in \<title\>, and * good tags like \<i\> will be dropped entirely. * * @param string|Message $name */ public function setPageTitle($name) { if ($name instanceof Message) { $name = $name->setContext($this->getContext())->text(); } # change "<script>foo&bar</script>" to "<script>foo&bar</script>" # but leave "<i>foobar</i>" alone $nameWithTags = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($name)); $this->mPagetitle = $nameWithTags; # change "<i>foo&bar</i>" to "foo&bar" $this->setHTMLTitle($this->msg('pagetitle')->rawParams(Sanitizer::stripAllTags($nameWithTags))->inContentLanguage()); }
protected function parseWikitext($title) { $apiParams = array('action' => 'parse', 'page' => $title->getPrefixedDBkey(), 'prop' => 'text|revid|categorieshtml|displaytitle'); $api = new ApiMain(new DerivativeRequest($this->getRequest(), $apiParams, false), true); $api->execute(); $result = $api->getResultData(); $content = isset($result['parse']['text']['*']) ? $result['parse']['text']['*'] : false; $categorieshtml = isset($result['parse']['categorieshtml']['*']) ? $result['parse']['categorieshtml']['*'] : false; $links = isset($result['parse']['links']) ? $result['parse']['links'] : array(); $revision = Revision::newFromId($result['parse']['revid']); $timestamp = $revision ? $revision->getTimestamp() : wfTimestampNow(); $displaytitle = isset($result['parse']['displaytitle']) ? $result['parse']['displaytitle'] : false; if ($content === false || strlen($content) && $revision === null) { return false; } if ($displaytitle !== false) { // Escape entities as in OutputPage::setPageTitle() $displaytitle = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($displaytitle)); } return array('content' => $content, 'categorieshtml' => $categorieshtml, 'basetimestamp' => $timestamp, 'starttimestamp' => wfTimestampNow(), 'displayTitleHtml' => $displaytitle); }