/** * @dataProvider provideTestWrapping */ public function testTidyWrapping($expected, $text, $msg = '') { $text = MWTidy::tidy($text); // We don't care about where Tidy wants to stick is <p>s $text = trim(preg_replace('#</?p>#', '', $text)); // Windows, we love you! $text = str_replace("\r", '', $text); $this->assertEquals($expected, $text, $msg); }
function execute($subpage) { global $wgRequest, $wgParser, $wgOut; $this->setHeaders(); $this->isNewParser = is_callable(array($wgParser, 'preprocessToDom')); $titleStr = $wgRequest->getText('contexttitle'); $title = Title::newFromText($titleStr); $selfTitle = $this->getTitle(); if (!$title) { $title = $selfTitle; } $input = $wgRequest->getText('input'); $this->generateXML = $this->isNewParser ? $wgRequest->getBool('generate_xml') : false; if (strlen($input)) { $this->removeComments = $wgRequest->getBool('removecomments', false); $this->removeNowiki = $wgRequest->getBool('removenowiki', false); $options = new ParserOptions(); $options->setRemoveComments($this->removeComments); $options->setTidy(true); $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE); if ($this->generateXML) { $wgParser->startExternalParse($title, $options, OT_PREPROCESS); $dom = $wgParser->preprocessToDom($input); if (is_callable(array($dom, 'saveXML'))) { $xml = $dom->saveXML(); } else { $xml = $dom->__toString(); } } $output = $wgParser->preprocess($input, $title, $options); } else { $this->removeComments = $wgRequest->getBool('removecomments', true); $this->removeNowiki = $wgRequest->getBool('removenowiki', false); $output = false; } $wgOut->addWikiText(wfMsg('expand_templates_intro')); $wgOut->addHTML($this->makeForm($titleStr, $input)); if ($output !== false) { global $wgUseTidy, $wgAlwaysUseTidy; if ($this->generateXML) { $wgOut->addHTML($this->makeOutput($xml, 'expand_templates_xml_output')); } $tmp = $this->makeOutput($output); if ($this->removeNowiki) { $tmp = preg_replace(array('_<nowiki>_', '_</nowiki>_', '_<nowiki */>_'), '', $tmp); } if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) { $tmp = MWTidy::tidy($tmp); } $wgOut->addHTML($tmp); $this->showHtmlPreview($title, $output, $wgOut); } }
/** * Show the special page */ function execute($subpage) { global $wgParser, $wgUseTidy, $wgAlwaysUseTidy; $this->setHeaders(); $request = $this->getRequest(); $titleStr = $request->getText('wpContextTitle'); $title = Title::newFromText($titleStr); if (!$title) { $title = $this->getPageTitle(); } $input = $request->getText('wpInput'); $this->generateXML = $request->getBool('wpGenerateXml'); $this->generateRawHtml = $request->getBool('wpGenerateRawHtml'); if (strlen($input)) { $this->removeComments = $request->getBool('wpRemoveComments', false); $this->removeNowiki = $request->getBool('wpRemoveNowiki', false); $options = ParserOptions::newFromContext($this->getContext()); $options->setRemoveComments($this->removeComments); $options->setTidy(true); $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE); if ($this->generateXML) { $wgParser->startExternalParse($title, $options, OT_PREPROCESS); $dom = $wgParser->preprocessToDom($input); if (method_exists($dom, 'saveXML')) { $xml = $dom->saveXML(); } else { $xml = $dom->__toString(); } } $output = $wgParser->preprocess($input, $title, $options); } else { $this->removeComments = $request->getBool('wpRemoveComments', true); $this->removeNowiki = $request->getBool('wpRemoveNowiki', false); $output = false; } $out = $this->getOutput(); $out->addWikiMsg('expand_templates_intro'); $out->addHTML($this->makeForm($titleStr, $input)); if ($output !== false) { if ($this->generateXML && strlen($output) > 0) { $out->addHTML($this->makeOutput($xml, 'expand_templates_xml_output')); } $tmp = $this->makeOutput($output); if ($this->removeNowiki) { $tmp = preg_replace(array('_<nowiki>_', '_</nowiki>_', '_<nowiki */>_'), '', $tmp); } if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) { $tmp = MWTidy::tidy($tmp); } $out->addHTML($tmp); $rawhtml = $this->generateHtml($title, $output); if ($this->generateRawHtml && strlen($rawhtml) > 0) { $out->addHTML($this->makeOutput($rawhtml, 'expand_templates_html_output')); } $this->showHtmlPreview($title, $rawhtml, $out); } }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); $this->mInputSize = strlen($text); if ($this->mOptions->getEnableLimitReport()) { $this->mOutput->resetParseStartTime(); } # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; $oldRevisionSize = $this->mRevisionSize; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; $this->mRevisionSize = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $cpuTime = $this->mOutput->getTimeSinceStart('cpu'); if ($cpuTime !== null) { $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime)); } $wallTime = $this->mOutput->getTimeSinceStart('wall'); $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime)); $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max)); $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max)); $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth())); $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit())); wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput)); $limitReport = "NewPP limit report\n"; if ($wgShowHostnames) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } foreach ($this->mOutput->getLimitReportData() as $key => $value) { if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) { $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false); $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false); if (!$valueMsg->exists()) { $valueMsg = new RawMessage('$1'); } if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) { $valueMsg->params($value); $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; } } } // Since we're not really outputting HTML, decode the entities and // then re-encode the things that need hiding inside HTML comments. $limitReport = htmlspecialchars_decode($limitReport); wfRunHooks('ParserLimitReport', array($this, &$limitReport)); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace(array('-', '&'), array('‐', '&'), $limitReport); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; $this->mRevisionSize = $oldRevisionSize; $this->mInputSize = false; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Helper function for parse() that transforms half-parsed HTML into fully * parsed HTML. * * @param string $text * @param bool $isMain * @param bool $linestart * @return string */ private function internalParseHalfParsed($text, $isMain = true, $linestart = true) { $text = $this->mStripState->unstripGeneral($text); if ($isMain) { Hooks::run('ParserAfterUnstrip', array(&$this, &$text)); } # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } $text = $this->mStripState->unstripNoWiki($text); if ($isMain) { Hooks::run('ParserBeforeTidy', array(&$this, &$text)); } $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if (MWTidy::isEnabled() && $this->mOptions->getTidy()) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($isMain) { Hooks::run('ParserAfterTidy', array(&$this, &$text)); } return $text; }
/** * Interface with html tidy * @deprecated Use MWTidy::tidy() */ public static function tidy($text) { wfDeprecated(__METHOD__); return MWTidy::tidy($text); }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { # Run convert unconditionally in 1.18-compatible mode global $wgBug34832TransitionalRollback; if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Run the "tidy" command on text if the $wgUseTidy * global is true * * @param $text String: the text to tidy * @return String */ protected function tidy($text) { global $wgUseTidy; if ($wgUseTidy) { $text = MWTidy::tidy($text); } return $text; }
/** * Split summary of a wikilog article from the contents. * If summary is part of the parser output, use it; otherwise, try to * extract it from the content text (section zero, before the first * heading). * * @param $parserOutput ParserOutput object. * @return Two-element array with summary and content. Summary may be * NULL if nonexistent. */ public static function splitSummaryContent( $parserOutput ) { global $wgUseTidy; $content = Sanitizer::removeHTMLcomments( $parserOutput->getText() ); if ( isset( $parserOutput->mExtWikilog ) && $parserOutput->mExtWikilog->mSummary ) { # Parser output contains wikilog output and summary, use it. $summary = Sanitizer::removeHTMLcomments( $parserOutput->mExtWikilog->mSummary ); } else { # Try to extract summary from the content text. $blocks = preg_split( '/<(h[1-6]).*?>.*?<\\/\\1>/i', $content, 2 ); if ( count( $blocks ) > 1 ) { # Long article with multiple sections, use only the first one. $summary = $blocks[0]; # It is possible for the regex to split on a heading that is # not a child of the root element (e.g. <div><h2>...</h2> # </div> leaving an open <div> tag). In order to handle such # cases, we pass the summary through tidy if it is available. if ( $wgUseTidy ) { $summary = MWTidy::tidy( $summary ); } } else { # Short article with a single section, use no summary and # leave to the caller to decide what to do. $summary = null; } } return array( $summary, $content ); }
public function execute($par) { global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy, $wgCompatablesJsonFileUrl; $this->setHeaders(); $args['topic'] = $this->getRequest()->getVal('topic'); $args['feature'] = $this->getRequest()->getVal('feature'); $args['format'] = $this->getRequest()->getVal('format'); $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']); // Handle purge requests from admins... // @TODO: Varnish, which only supports a few bits of ESI, can not handle this // (https://www.varnish-cache.org/docs/3.0/tutorial/esi.html) // (https://www.varnish-cache.org/trac/wiki/Future_ESI) if ($this->getRequest()->getVal('action') === 'purge') { // See https://github.com/webplatform/mediawiki/issues/16 #TODO Compatables::memcacheRemove(wfMemcKey('webplatformdocs', 'compatables', 'data', 'full')); Compatables::memcacheRemove($args['cacheKey']); try { $req = MWHttpRequest::factory($wgCompatablesJsonFileUrl, array('method' => 'PURGE')); $status = $req->execute(); wfDebugLog('CompaTables', 'cURL PURGE done to "' . $wgCompatablesJsonFileUrl); } catch (Exception $e) { wfDebugLog('CompaTables', 'Had problems with cURL PURGE to "' . $wgCompatablesJsonFileUrl . '", message ' . $e->getMessage()); // Do nothing } if ($wgCompatablesUseESI && $this->getUser()->isAllowed('purgecompatables')) { // Get the ESI URL prefix to purge $urlPrefix = SpecialPage::getTitleFor('Compatables')->getFullUrl(); $urlPrefix = wfExpandUrl($urlPrefix, PROTO_INTERNAL); // Include as an in-band ESI invalidation request $this->getOutput()->addHtml("\n<esi:invalidate>\n" . "<?xml version=\"1.0\"?>\n" . "<!DOCTYPE INVALIDATION SYSTEM \"internal:///WCSinvalidation.dtd\">\n" . "<INVALIDATION VERSION=\"WCS-1.1\">\n" . "<OBJECT>\n" . Xml::element('ADVANCEDSELECTOR', array('URIPREFIX' => $urlPrefix)) . "\n<ACTION REMOVALTTL=\"0\"/>\n" . "</OBJECT>\n" . "</INVALIDATION>\n" . "</esi:invalidate>\n"); $this->getOutput()->addWikiMsg('compatables-purged'); return; } else { throw new PermissionsError('purgecompatables'); } } // 1 hour server-side cache max before revalidate $this->getOutput()->setSquidMaxage(Compatables::MAX_AGE); // Try to handle IMS GET requests from CDN efficiently // $data['timestamp'] has been added, to not have to do // 2 HTTP queries $data = Compatables::getData(); if ($this->getOutput()->checkLastModified($data['timestamp'])) { return; // nothing to send (cache hit) } /** ***************************** **/ $cached = Compatables::memcacheRead($args['cacheKey'], $data['hash']); if ($cached !== false) { $table = $cached['output']; } else { $generated = Compatables::generateCompaTable($data, $args); if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) { $generated['output'] = MWTidy::tidy($generated['output']); } Compatables::memcacheSave($args['cacheKey'], $generated); $table = $generated['output']; } /** ***************************** **/ $this->getOutput()->addHtml('<h1>Compatibility data</h1>'); //$this->getOutput()->addHtml('<pre><nowiki>'.print_r($data,1).'</nowiki></pre>'); if ($this->getRequest()->getBool('foresi')) { $this->getOutput()->addHtml($table); $this->getOutput()->setArticleBodyOnly(true); } else { $this->getOutput()->addHtml($table); } }
/** * Get data of requested article. * @param Title $title * @param boolean $noImages * @return array */ private function getData(Title $title, $noImages) { global $wgMemc, $wgUseTidy, $wgMFTidyMobileViewSections, $wgMFMinCachedPageSize, $wgMFSpecialCaseMainPage; $wp = $this->makeWikiPage($title); if ($this->followRedirects && $wp->isRedirect()) { $newTitle = $wp->getRedirectTarget(); if ($newTitle) { $title = $newTitle; $this->getResult()->addValue(null, $this->getModuleName(), array('redirected' => $title->getPrefixedText())); if ($title->getNamespace() < 0) { $this->getResult()->addValue(null, $this->getModuleName(), array('viewable' => 'no')); return array(); } $wp = $this->makeWikiPage($title); } } $latest = $wp->getLatest(); if ($this->file) { $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $this->file->getSha1(), $this->variant); $cacheExpiry = 3600; } else { if (!$latest) { // https://bugzilla.wikimedia.org/show_bug.cgi?id=53378 // Title::exists() above doesn't seem to always catch recently deleted pages $this->dieUsageMsg(array('notanarticle', $title->getPrefixedText())); } $parserOptions = $this->makeParserOptions($wp); $parserCacheKey = ParserCache::singleton()->getKey($wp, $parserOptions); $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $parserCacheKey); } $data = $wgMemc->get($key); if ($data) { wfIncrStats('mobile.view.cache-hit'); return $data; } wfIncrStats('mobile.view.cache-miss'); if ($this->file) { $html = $this->getFilePage($title); } else { $parserOutput = $this->getParserOutput($wp, $parserOptions); $html = $parserOutput->getText(); $cacheExpiry = $parserOutput->getCacheExpiry(); } if (!$this->noTransform) { $mf = new MobileFormatter(MobileFormatter::wrapHTML($html), $title); $mf->setRemoveMedia($noImages); $mf->filterContent(); $mf->setIsMainPage($this->mainPage && $wgMFSpecialCaseMainPage); $html = $mf->getText(); } if ($this->mainPage || $this->file) { $data = array('sections' => array(), 'text' => array($html), 'refsections' => array()); } else { $data = array(); $data['sections'] = $parserOutput->getSections(); $sectionCount = count($data['sections']); for ($i = 0; $i < $sectionCount; $i++) { $data['sections'][$i]['line'] = $title->getPageLanguage()->convert($data['sections'][$i]['line']); } $chunks = preg_split('/<h(?=[1-6]\\b)/i', $html); if (count($chunks) != count($data['sections']) + 1) { wfDebugLog('mobile', __METHOD__ . "(): mismatching number of " . "sections from parser and split on page {$title->getPrefixedText()}, oldid={$latest}"); // We can't be sure about anything here, return all page HTML as one big section $chunks = array($html); $data['sections'] = array(); } $data['text'] = array(); $data['refsections'] = array(); foreach ($chunks as $chunk) { if (count($data['text'])) { $chunk = "<h{$chunk}"; } if ($wgUseTidy && $wgMFTidyMobileViewSections && count($chunks) > 1) { $chunk = MWTidy::tidy($chunk); } if (preg_match('/<ol\\b[^>]*?class="references"/', $chunk)) { $data['refsections'][count($data['text'])] = true; } $data['text'][] = $chunk; } if ($this->usePageImages) { $image = $this->getPageImage($title); if ($image) { $data['image'] = $image->getTitle()->getText(); } } } $data['lastmodified'] = wfTimestamp(TS_ISO_8601, $wp->getTimestamp()); // Page id $data['id'] = $wp->getId(); $user = User::newFromId($wp->getUser()); if (!$user->isAnon()) { $data['lastmodifiedby'] = array('name' => $wp->getUserText(), 'gender' => $user->getOption('gender')); } else { $data['lastmodifiedby'] = null; } $data['revision'] = $title->getLatestRevID(); if (isset($parserOutput)) { $languages = $parserOutput->getLanguageLinks(); $data['languagecount'] = count($languages); $data['displaytitle'] = $parserOutput->getDisplayTitle(); // @fixme: Does no work for some extension properties that get added in LinksUpdate $data['pageprops'] = $parserOutput->getProperties(); } else { $data['languagecount'] = 0; $data['displaytitle'] = $title->getPrefixedText(); $data['pageprops'] = array(); } if ($title->getPageLanguage()->hasVariants()) { $data['hasvariants'] = true; } // Don't store small pages to decrease cache size requirements if (strlen($html) >= $wgMFMinCachedPageSize) { // store for the same time as original parser output $wgMemc->set($key, $data, $cacheExpiry); } return $data; }
function truncateHtml($text, $length, $ellipsis = '...') { # Use the localized ellipsis character if ($ellipsis == '...') { $ellipsis = wfMsgExt('ellipsis', array('escapenoentities', 'language' => $this)); } # Check if there is no need to truncate if ($length <= 0) { return $ellipsis; // no text shown, nothing to format } elseif (strlen($text) <= $length) { return $text; // string short enough even *with* HTML } $text = MWTidy::tidy($text); // fix tags $displayLen = 0; // innerHTML legth so far $testingEllipsis = false; // checking if ellipses will make string longer/equal? $tagType = 0; // 0-open, 1-close $bracketState = 0; // 1-tag start, 2-tag name, 0-neither $entityState = 0; // 0-not entity, 1-entity $tag = $ret = ''; $openTags = array(); // open tag stack $textLen = strlen($text); for ($pos = 0; $pos < $textLen; ++$pos) { $ch = $text[$pos]; $lastCh = $pos ? $text[$pos - 1] : ''; $ret .= $ch; // add to result string if ($ch == '<') { $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags); // for bad HTML $entityState = 0; // for bad HTML $bracketState = 1; // tag started (checking for backslash) } elseif ($ch == '>') { $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags); $entityState = 0; // for bad HTML $bracketState = 0; // out of brackets } elseif ($bracketState == 1) { if ($ch == '/') { $tagType = 1; // close tag (e.g. "</span>") } else { $tagType = 0; // open tag (e.g. "<span>") $tag .= $ch; } $bracketState = 2; // building tag name } elseif ($bracketState == 2) { if ($ch != ' ') { $tag .= $ch; } else { // Name found (e.g. "<a href=..."), add on tag attributes... $pos += $this->truncate_skip($ret, $text, "<>", $pos + 1); } } elseif ($bracketState == 0) { if ($entityState) { if ($ch == ';') { $entityState = 0; $displayLen++; // entity is one displayed char } } else { if ($ch == '&') { $entityState = 1; // entity found, (e.g. " ") } else { $displayLen++; // this char is displayed // Add on the other display text after this... $skipped = $this->truncate_skip($ret, $text, "<>&", $pos + 1, $length - $displayLen); $displayLen += $skipped; $pos += $skipped; } } } # Consider truncation once the display length has reached the maximim. # Double-check that we're not in the middle of a bracket/entity... if ($displayLen >= $length && $bracketState == 0 && $entityState == 0) { if (!$testingEllipsis) { $testingEllipsis = true; # Save where we are; we will truncate here unless # the ellipsis actually makes the string longer. $pOpenTags = $openTags; // save state $pRet = $ret; // save state } elseif ($displayLen > $length + strlen($ellipsis)) { # Ellipsis won't make string longer/equal, the truncation point was OK. $openTags = $pOpenTags; // reload state $ret = $this->removeBadCharLast($pRet); // reload state, multi-byte char fix $ret .= $ellipsis; // add ellipsis break; } } } if ($displayLen == 0) { return ''; // no text shown, nothing to format } // Close the last tag if left unclosed by bad HTML $this->truncate_endBracket($tag, $text[$textLen - 1], $tagType, $openTags); while (count($openTags) > 0) { $ret .= '</' . array_pop($openTags) . '>'; // close open tags } return $ret; }
/** * Render CompaTable HTML code * * Reads from JSON file, triggers generation if required * and optionally adds ESI tags. * * @param string $input * @param array $args * @param Parser $parser */ public static function renderCompaTables($input, array $args, Parser $parser) { global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy; $out = ''; $args['topic'] = isset($args['topic']) ? $args['topic'] : ''; $args['feature'] = isset($args['feature']) ? $args['feature'] : ''; $args['format'] = isset($args['format']) ? $args['format'] : ''; $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']); /** ***************************** **/ $data = self::getData(); if ($data !== null) { $cached = self::memcacheRead($args['cacheKey'], $data['hash']); if ($cached !== false) { $table = $cached['output']; } else { $generated = self::generateCompaTable($data, $args); if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) { $generated['output'] = MWTidy::tidy($generated['output']); } self::memcacheSave($args['cacheKey'], $generated); $table = $generated['output']; } /** ***************************** * */ // We are ignoring <compatibility>input would be here</compatibility> // because its useless for now. //if ( $input != '' ) { // $out .= '<p>' . $input . '</p>'; //} if ($wgCompatablesUseESI === true) { $urlArgs['topic'] = $args['topic']; $urlArgs['feature'] = $args['feature']; $urlArgs['format'] = $args['format']; $urlArgs['foresi'] = 1; // @TODO: this breaks in ESI level if $url ends up http for https views $urlHelper = SpecialPage::getTitleFor('Compatables')->getFullUrl($urlArgs); $out .= self::applyEsiTags($table, wfExpandUrl($urlHelper, PROTO_INTERNAL)); } else { $out .= $table; $parser->getOutput()->updateCacheExpiry(6 * 3600); // worse cache hit rate } } else { wfDebugLog('CompaTables', 'Could not generate table, data is either empty or had problems.'); $out = '<!-- Compatables: Could not generate table, data might be empty or had problems with caching -->'; } return $out; }
/** * @group medium * @dataProvider parserTestProvider * @param string $desc * @param string $input * @param string $result * @param array $opts * @param array $config */ public function testParserTest($desc, $input, $result, $opts, $config) { if ($this->regex != '' && !preg_match('/' . $this->regex . '/', $desc)) { $this->assertTrue(true); // XXX: don't flood output with "test made no assertions" //$this->markTestSkipped( 'Filtered out by the user' ); return; } if (!$this->isWikitextNS(NS_MAIN)) { // parser tests frequently assume that the main namespace contains wikitext. // @todo When setting up pages, force the content model. Only skip if // $wgtContentModelUseDB is false. $this->markTestSkipped("Main namespace does not support wikitext," . "skipping parser test: {$desc}"); } wfDebug("Running parser test: {$desc}\n"); $opts = $this->parseOptions($opts); $context = $this->setupGlobals($opts, $config); $user = $context->getUser(); $options = ParserOptions::newFromContext($context); if (isset($opts['title'])) { $titleText = $opts['title']; } else { $titleText = 'Parser test'; } $local = isset($opts['local']); $preprocessor = isset($opts['preprocessor']) ? $opts['preprocessor'] : null; $parser = $this->getParser($preprocessor); $title = Title::newFromText($titleText); # Parser test requiring math. Make sure texvc is executable # or just skip such tests. if (isset($opts['math']) || isset($opts['texvc'])) { global $wgTexvc; if (!isset($wgTexvc)) { $this->markTestSkipped("SKIPPED: \$wgTexvc is not set"); } elseif (!is_executable($wgTexvc)) { $this->markTestSkipped("SKIPPED: texvc binary does not exist" . " or is not executable.\n" . "Current configuration is:\n\$wgTexvc = '{$wgTexvc}'"); } } if (isset($opts['djvu'])) { if (!$this->djVuSupport->isEnabled()) { $this->markTestSkipped("SKIPPED: djvu binaries do not exist or are not executable.\n"); } } if (isset($opts['pst'])) { $out = $parser->preSaveTransform($input, $title, $user, $options); } elseif (isset($opts['msg'])) { $out = $parser->transformMsg($input, $options, $title); } elseif (isset($opts['section'])) { $section = $opts['section']; $out = $parser->getSection($input, $section); } elseif (isset($opts['replace'])) { $section = $opts['replace'][0]; $replace = $opts['replace'][1]; $out = $parser->replaceSection($input, $section, $replace); } elseif (isset($opts['comment'])) { $out = Linker::formatComment($input, $title, $local); } elseif (isset($opts['preload'])) { $out = $parser->getPreloadText($input, $title, $options); } else { $output = $parser->parse($input, $title, $options, true, true, 1337); $output->setTOCEnabled(!isset($opts['notoc'])); $out = $output->getText(); if (isset($opts['tidy'])) { if (!$this->tidySupport->isEnabled()) { $this->markTestSkipped("SKIPPED: tidy extension is not installed.\n"); } else { $out = MWTidy::tidy($out); $out = preg_replace('/\\s+$/', '', $out); } } if (isset($opts['showtitle'])) { if ($output->getTitleText()) { $title = $output->getTitleText(); } $out = "{$title}\n{$out}"; } if (isset($opts['ill'])) { $out = implode(' ', $output->getLanguageLinks()); } elseif (isset($opts['cat'])) { $outputPage = $context->getOutput(); $outputPage->addCategoryLinks($output->getCategories()); $cats = $outputPage->getCategoryLinks(); if (isset($cats['normal'])) { $out = implode(' ', $cats['normal']); } else { $out = ''; } } $parser->mPreprocessor = null; } $this->teardownGlobals(); $this->assertEquals($result, $out, $desc); }