/**
  * @dataProvider provideTestWrapping
  */
 public function testTidyWrapping($expected, $text, $msg = '')
 {
     $text = MWTidy::tidy($text);
     // We don't care about where Tidy wants to stick is <p>s
     $text = trim(preg_replace('#</?p>#', '', $text));
     // Windows, we love you!
     $text = str_replace("\r", '', $text);
     $this->assertEquals($expected, $text, $msg);
 }
 function execute($subpage)
 {
     global $wgRequest, $wgParser, $wgOut;
     $this->setHeaders();
     $this->isNewParser = is_callable(array($wgParser, 'preprocessToDom'));
     $titleStr = $wgRequest->getText('contexttitle');
     $title = Title::newFromText($titleStr);
     $selfTitle = $this->getTitle();
     if (!$title) {
         $title = $selfTitle;
     }
     $input = $wgRequest->getText('input');
     $this->generateXML = $this->isNewParser ? $wgRequest->getBool('generate_xml') : false;
     if (strlen($input)) {
         $this->removeComments = $wgRequest->getBool('removecomments', false);
         $this->removeNowiki = $wgRequest->getBool('removenowiki', false);
         $options = new ParserOptions();
         $options->setRemoveComments($this->removeComments);
         $options->setTidy(true);
         $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE);
         if ($this->generateXML) {
             $wgParser->startExternalParse($title, $options, OT_PREPROCESS);
             $dom = $wgParser->preprocessToDom($input);
             if (is_callable(array($dom, 'saveXML'))) {
                 $xml = $dom->saveXML();
             } else {
                 $xml = $dom->__toString();
             }
         }
         $output = $wgParser->preprocess($input, $title, $options);
     } else {
         $this->removeComments = $wgRequest->getBool('removecomments', true);
         $this->removeNowiki = $wgRequest->getBool('removenowiki', false);
         $output = false;
     }
     $wgOut->addWikiText(wfMsg('expand_templates_intro'));
     $wgOut->addHTML($this->makeForm($titleStr, $input));
     if ($output !== false) {
         global $wgUseTidy, $wgAlwaysUseTidy;
         if ($this->generateXML) {
             $wgOut->addHTML($this->makeOutput($xml, 'expand_templates_xml_output'));
         }
         $tmp = $this->makeOutput($output);
         if ($this->removeNowiki) {
             $tmp = preg_replace(array('_&lt;nowiki&gt;_', '_&lt;/nowiki&gt;_', '_&lt;nowiki */&gt;_'), '', $tmp);
         }
         if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) {
             $tmp = MWTidy::tidy($tmp);
         }
         $wgOut->addHTML($tmp);
         $this->showHtmlPreview($title, $output, $wgOut);
     }
 }
 /**
  * Show the special page
  */
 function execute($subpage)
 {
     global $wgParser, $wgUseTidy, $wgAlwaysUseTidy;
     $this->setHeaders();
     $request = $this->getRequest();
     $titleStr = $request->getText('wpContextTitle');
     $title = Title::newFromText($titleStr);
     if (!$title) {
         $title = $this->getPageTitle();
     }
     $input = $request->getText('wpInput');
     $this->generateXML = $request->getBool('wpGenerateXml');
     $this->generateRawHtml = $request->getBool('wpGenerateRawHtml');
     if (strlen($input)) {
         $this->removeComments = $request->getBool('wpRemoveComments', false);
         $this->removeNowiki = $request->getBool('wpRemoveNowiki', false);
         $options = ParserOptions::newFromContext($this->getContext());
         $options->setRemoveComments($this->removeComments);
         $options->setTidy(true);
         $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE);
         if ($this->generateXML) {
             $wgParser->startExternalParse($title, $options, OT_PREPROCESS);
             $dom = $wgParser->preprocessToDom($input);
             if (method_exists($dom, 'saveXML')) {
                 $xml = $dom->saveXML();
             } else {
                 $xml = $dom->__toString();
             }
         }
         $output = $wgParser->preprocess($input, $title, $options);
     } else {
         $this->removeComments = $request->getBool('wpRemoveComments', true);
         $this->removeNowiki = $request->getBool('wpRemoveNowiki', false);
         $output = false;
     }
     $out = $this->getOutput();
     $out->addWikiMsg('expand_templates_intro');
     $out->addHTML($this->makeForm($titleStr, $input));
     if ($output !== false) {
         if ($this->generateXML && strlen($output) > 0) {
             $out->addHTML($this->makeOutput($xml, 'expand_templates_xml_output'));
         }
         $tmp = $this->makeOutput($output);
         if ($this->removeNowiki) {
             $tmp = preg_replace(array('_&lt;nowiki&gt;_', '_&lt;/nowiki&gt;_', '_&lt;nowiki */&gt;_'), '', $tmp);
         }
         if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) {
             $tmp = MWTidy::tidy($tmp);
         }
         $out->addHTML($tmp);
         $rawhtml = $this->generateHtml($title, $output);
         if ($this->generateRawHtml && strlen($rawhtml) > 0) {
             $out->addHTML($this->makeOutput($rawhtml, 'expand_templates_html_output'));
         }
         $this->showHtmlPreview($title, $rawhtml, $out);
     }
 }
Exemple #4
0
 /**
  * Convert wikitext to HTML
  * Do not call this function recursively.
  *
  * @param string $text text we want to parse
  * @param $title Title object
  * @param $options ParserOptions
  * @param $linestart boolean
  * @param $clearState boolean
  * @param int $revid number to pass in {{REVISIONID}}
  * @return ParserOutput a ParserOutput
  */
 public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null)
 {
     /**
      * First pass--just handle <nowiki> sections, pass the rest off
      * to internalParse() which does all the real work.
      */
     global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames;
     $fname = __METHOD__ . '-' . wfGetCaller();
     wfProfileIn(__METHOD__);
     wfProfileIn($fname);
     $this->startParse($title, $options, self::OT_HTML, $clearState);
     $this->mInputSize = strlen($text);
     if ($this->mOptions->getEnableLimitReport()) {
         $this->mOutput->resetParseStartTime();
     }
     # Remove the strip marker tag prefix from the input, if present.
     if ($clearState) {
         $text = str_replace($this->mUniqPrefix, '', $text);
     }
     $oldRevisionId = $this->mRevisionId;
     $oldRevisionObject = $this->mRevisionObject;
     $oldRevisionTimestamp = $this->mRevisionTimestamp;
     $oldRevisionUser = $this->mRevisionUser;
     $oldRevisionSize = $this->mRevisionSize;
     if ($revid !== null) {
         $this->mRevisionId = $revid;
         $this->mRevisionObject = null;
         $this->mRevisionTimestamp = null;
         $this->mRevisionUser = null;
         $this->mRevisionSize = null;
     }
     wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState));
     # No more strip!
     wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState));
     $text = $this->internalParse($text);
     wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState));
     $text = $this->mStripState->unstripGeneral($text);
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         if (!$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     /**
      * A converted title will be provided in the output object if title and
      * content conversion are enabled, the article text does not contain
      * a conversion-suppressing double-underscore tag, and no
      * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
      * automatic link conversion.
      */
     if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) {
         $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
         if ($convruletitle) {
             $this->mOutput->setTitleText($convruletitle);
         } else {
             $titleText = $this->getConverterLanguage()->convertTitle($title);
             $this->mOutput->setTitleText($titleText);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     wfRunHooks('ParserBeforeTidy', array(&$this, &$text));
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) {
         $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit());
     }
     wfRunHooks('ParserAfterTidy', array(&$this, &$text));
     # Information on include size limits, for the benefit of users who try to skirt them
     if ($this->mOptions->getEnableLimitReport()) {
         $max = $this->mOptions->getMaxIncludeSize();
         $cpuTime = $this->mOutput->getTimeSinceStart('cpu');
         if ($cpuTime !== null) {
             $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime));
         }
         $wallTime = $this->mOutput->getTimeSinceStart('wall');
         $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime));
         $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount()));
         $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount()));
         $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max));
         $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max));
         $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth()));
         $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()));
         wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput));
         $limitReport = "NewPP limit report\n";
         if ($wgShowHostnames) {
             $limitReport .= 'Parsed by ' . wfHostname() . "\n";
         }
         foreach ($this->mOutput->getLimitReportData() as $key => $value) {
             if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) {
                 $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false);
                 $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false);
                 if (!$valueMsg->exists()) {
                     $valueMsg = new RawMessage('$1');
                 }
                 if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) {
                     $valueMsg->params($value);
                     $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
                 }
             }
         }
         // Since we're not really outputting HTML, decode the entities and
         // then re-encode the things that need hiding inside HTML comments.
         $limitReport = htmlspecialchars_decode($limitReport);
         wfRunHooks('ParserLimitReport', array($this, &$limitReport));
         // Sanitize for comment. Note '‐' in the replacement is U+2010,
         // which looks much like the problematic '-'.
         $limitReport = str_replace(array('-', '&'), array('‐', '&amp;'), $limitReport);
         $text .= "\n<!-- \n{$limitReport}-->\n";
         if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) {
             wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey());
         }
     }
     $this->mOutput->setText($text);
     $this->mRevisionId = $oldRevisionId;
     $this->mRevisionObject = $oldRevisionObject;
     $this->mRevisionTimestamp = $oldRevisionTimestamp;
     $this->mRevisionUser = $oldRevisionUser;
     $this->mRevisionSize = $oldRevisionSize;
     $this->mInputSize = false;
     wfProfileOut($fname);
     wfProfileOut(__METHOD__);
     return $this->mOutput;
 }
Exemple #5
0
 /**
  * Helper function for parse() that transforms half-parsed HTML into fully
  * parsed HTML.
  *
  * @param string $text
  * @param bool $isMain
  * @param bool $linestart
  * @return string
  */
 private function internalParseHalfParsed($text, $isMain = true, $linestart = true)
 {
     $text = $this->mStripState->unstripGeneral($text);
     if ($isMain) {
         Hooks::run('ParserAfterUnstrip', array(&$this, &$text));
     }
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         if (!$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     if ($isMain) {
         Hooks::run('ParserBeforeTidy', array(&$this, &$text));
     }
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if (MWTidy::isEnabled() && $this->mOptions->getTidy()) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($isMain) {
         Hooks::run('ParserAfterTidy', array(&$this, &$text));
     }
     return $text;
 }
Exemple #6
0
 /**
  * Interface with html tidy
  * @deprecated Use MWTidy::tidy()
  */
 public static function tidy($text)
 {
     wfDeprecated(__METHOD__);
     return MWTidy::tidy($text);
 }
Exemple #7
0
 /**
  * Convert wikitext to HTML
  * Do not call this function recursively.
  *
  * @param $text String: text we want to parse
  * @param $title Title object
  * @param $options ParserOptions
  * @param $linestart boolean
  * @param $clearState boolean
  * @param $revid Int: number to pass in {{REVISIONID}}
  * @return ParserOutput a ParserOutput
  */
 public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null)
 {
     /**
      * First pass--just handle <nowiki> sections, pass the rest off
      * to internalParse() which does all the real work.
      */
     global $wgUseTidy, $wgAlwaysUseTidy;
     $fname = __METHOD__ . '-' . wfGetCaller();
     wfProfileIn(__METHOD__);
     wfProfileIn($fname);
     $this->startParse($title, $options, self::OT_HTML, $clearState);
     # Remove the strip marker tag prefix from the input, if present.
     if ($clearState) {
         $text = str_replace($this->mUniqPrefix, '', $text);
     }
     $oldRevisionId = $this->mRevisionId;
     $oldRevisionObject = $this->mRevisionObject;
     $oldRevisionTimestamp = $this->mRevisionTimestamp;
     $oldRevisionUser = $this->mRevisionUser;
     if ($revid !== null) {
         $this->mRevisionId = $revid;
         $this->mRevisionObject = null;
         $this->mRevisionTimestamp = null;
         $this->mRevisionUser = null;
     }
     wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState));
     # No more strip!
     wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState));
     $text = $this->internalParse($text);
     wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState));
     $text = $this->mStripState->unstripGeneral($text);
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         # Run convert unconditionally in 1.18-compatible mode
         global $wgBug34832TransitionalRollback;
         if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     /**
      * A converted title will be provided in the output object if title and
      * content conversion are enabled, the article text does not contain
      * a conversion-suppressing double-underscore tag, and no
      * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
      * automatic link conversion.
      */
     if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) {
         $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
         if ($convruletitle) {
             $this->mOutput->setTitleText($convruletitle);
         } else {
             $titleText = $this->getConverterLanguage()->convertTitle($title);
             $this->mOutput->setTitleText($titleText);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     wfRunHooks('ParserBeforeTidy', array(&$this, &$text));
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) {
         $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit());
     }
     wfRunHooks('ParserAfterTidy', array(&$this, &$text));
     # Information on include size limits, for the benefit of users who try to skirt them
     if ($this->mOptions->getEnableLimitReport()) {
         $max = $this->mOptions->getMaxIncludeSize();
         $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
         $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport;
         wfRunHooks('ParserLimitReport', array($this, &$limitReport));
         $text .= "\n<!-- \n{$limitReport}-->\n";
         if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) {
             wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey());
         }
     }
     $this->mOutput->setText($text);
     $this->mRevisionId = $oldRevisionId;
     $this->mRevisionObject = $oldRevisionObject;
     $this->mRevisionTimestamp = $oldRevisionTimestamp;
     $this->mRevisionUser = $oldRevisionUser;
     wfProfileOut($fname);
     wfProfileOut(__METHOD__);
     return $this->mOutput;
 }
 /**
  * Run the "tidy" command on text if the $wgUseTidy
  * global is true
  *
  * @param $text String: the text to tidy
  * @return String
  */
 protected function tidy($text)
 {
     global $wgUseTidy;
     if ($wgUseTidy) {
         $text = MWTidy::tidy($text);
     }
     return $text;
 }
	/**
	 * Split summary of a wikilog article from the contents.
	 * If summary is part of the parser output, use it; otherwise, try to
	 * extract it from the content text (section zero, before the first
	 * heading).
	 *
	 * @param $parserOutput ParserOutput object.
	 * @return Two-element array with summary and content. Summary may be
	 *   NULL if nonexistent.
	 */
	public static function splitSummaryContent( $parserOutput ) {
		global $wgUseTidy;

		$content = Sanitizer::removeHTMLcomments( $parserOutput->getText() );

		if ( isset( $parserOutput->mExtWikilog ) && $parserOutput->mExtWikilog->mSummary ) {
			# Parser output contains wikilog output and summary, use it.
			$summary = Sanitizer::removeHTMLcomments( $parserOutput->mExtWikilog->mSummary );
		} else {
			# Try to extract summary from the content text.
			$blocks = preg_split( '/<(h[1-6]).*?>.*?<\\/\\1>/i', $content, 2 );
			if ( count( $blocks ) > 1 ) {
				# Long article with multiple sections, use only the first one.
				$summary = $blocks[0];
				# It is possible for the regex to split on a heading that is
				# not a child of the root element (e.g. <div><h2>...</h2>
				# </div> leaving an open <div> tag). In order to handle such
				# cases, we pass the summary through tidy if it is available.
				if ( $wgUseTidy ) {
					$summary = MWTidy::tidy( $summary );
				}
			} else {
				# Short article with a single section, use no summary and
				# leave to the caller to decide what to do.
				$summary = null;
			}
		}

		return array( $summary, $content );
	}
 public function execute($par)
 {
     global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy, $wgCompatablesJsonFileUrl;
     $this->setHeaders();
     $args['topic'] = $this->getRequest()->getVal('topic');
     $args['feature'] = $this->getRequest()->getVal('feature');
     $args['format'] = $this->getRequest()->getVal('format');
     $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']);
     // Handle purge requests from admins...
     // @TODO: Varnish, which only supports a few bits of ESI, can not handle this
     // (https://www.varnish-cache.org/docs/3.0/tutorial/esi.html)
     // (https://www.varnish-cache.org/trac/wiki/Future_ESI)
     if ($this->getRequest()->getVal('action') === 'purge') {
         // See https://github.com/webplatform/mediawiki/issues/16 #TODO
         Compatables::memcacheRemove(wfMemcKey('webplatformdocs', 'compatables', 'data', 'full'));
         Compatables::memcacheRemove($args['cacheKey']);
         try {
             $req = MWHttpRequest::factory($wgCompatablesJsonFileUrl, array('method' => 'PURGE'));
             $status = $req->execute();
             wfDebugLog('CompaTables', 'cURL PURGE done to "' . $wgCompatablesJsonFileUrl);
         } catch (Exception $e) {
             wfDebugLog('CompaTables', 'Had problems with cURL PURGE to "' . $wgCompatablesJsonFileUrl . '", message ' . $e->getMessage());
             // Do nothing
         }
         if ($wgCompatablesUseESI && $this->getUser()->isAllowed('purgecompatables')) {
             // Get the ESI URL prefix to purge
             $urlPrefix = SpecialPage::getTitleFor('Compatables')->getFullUrl();
             $urlPrefix = wfExpandUrl($urlPrefix, PROTO_INTERNAL);
             // Include as an in-band ESI invalidation request
             $this->getOutput()->addHtml("\n<esi:invalidate>\n" . "<?xml version=\"1.0\"?>\n" . "<!DOCTYPE INVALIDATION SYSTEM \"internal:///WCSinvalidation.dtd\">\n" . "<INVALIDATION VERSION=\"WCS-1.1\">\n" . "<OBJECT>\n" . Xml::element('ADVANCEDSELECTOR', array('URIPREFIX' => $urlPrefix)) . "\n<ACTION REMOVALTTL=\"0\"/>\n" . "</OBJECT>\n" . "</INVALIDATION>\n" . "</esi:invalidate>\n");
             $this->getOutput()->addWikiMsg('compatables-purged');
             return;
         } else {
             throw new PermissionsError('purgecompatables');
         }
     }
     // 1 hour server-side cache max before revalidate
     $this->getOutput()->setSquidMaxage(Compatables::MAX_AGE);
     // Try to handle IMS GET requests from CDN efficiently
     //   $data['timestamp'] has been added, to not have to do
     //   2 HTTP queries
     $data = Compatables::getData();
     if ($this->getOutput()->checkLastModified($data['timestamp'])) {
         return;
         // nothing to send (cache hit)
     }
     /**   *****************************   **/
     $cached = Compatables::memcacheRead($args['cacheKey'], $data['hash']);
     if ($cached !== false) {
         $table = $cached['output'];
     } else {
         $generated = Compatables::generateCompaTable($data, $args);
         if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) {
             $generated['output'] = MWTidy::tidy($generated['output']);
         }
         Compatables::memcacheSave($args['cacheKey'], $generated);
         $table = $generated['output'];
     }
     /**   *****************************   **/
     $this->getOutput()->addHtml('<h1>Compatibility data</h1>');
     //$this->getOutput()->addHtml('<pre><nowiki>'.print_r($data,1).'</nowiki></pre>');
     if ($this->getRequest()->getBool('foresi')) {
         $this->getOutput()->addHtml($table);
         $this->getOutput()->setArticleBodyOnly(true);
     } else {
         $this->getOutput()->addHtml($table);
     }
 }
 /**
  * Get data of requested article.
  * @param Title $title
  * @param boolean $noImages
  * @return array
  */
 private function getData(Title $title, $noImages)
 {
     global $wgMemc, $wgUseTidy, $wgMFTidyMobileViewSections, $wgMFMinCachedPageSize, $wgMFSpecialCaseMainPage;
     $wp = $this->makeWikiPage($title);
     if ($this->followRedirects && $wp->isRedirect()) {
         $newTitle = $wp->getRedirectTarget();
         if ($newTitle) {
             $title = $newTitle;
             $this->getResult()->addValue(null, $this->getModuleName(), array('redirected' => $title->getPrefixedText()));
             if ($title->getNamespace() < 0) {
                 $this->getResult()->addValue(null, $this->getModuleName(), array('viewable' => 'no'));
                 return array();
             }
             $wp = $this->makeWikiPage($title);
         }
     }
     $latest = $wp->getLatest();
     if ($this->file) {
         $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $this->file->getSha1(), $this->variant);
         $cacheExpiry = 3600;
     } else {
         if (!$latest) {
             // https://bugzilla.wikimedia.org/show_bug.cgi?id=53378
             // Title::exists() above doesn't seem to always catch recently deleted pages
             $this->dieUsageMsg(array('notanarticle', $title->getPrefixedText()));
         }
         $parserOptions = $this->makeParserOptions($wp);
         $parserCacheKey = ParserCache::singleton()->getKey($wp, $parserOptions);
         $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $parserCacheKey);
     }
     $data = $wgMemc->get($key);
     if ($data) {
         wfIncrStats('mobile.view.cache-hit');
         return $data;
     }
     wfIncrStats('mobile.view.cache-miss');
     if ($this->file) {
         $html = $this->getFilePage($title);
     } else {
         $parserOutput = $this->getParserOutput($wp, $parserOptions);
         $html = $parserOutput->getText();
         $cacheExpiry = $parserOutput->getCacheExpiry();
     }
     if (!$this->noTransform) {
         $mf = new MobileFormatter(MobileFormatter::wrapHTML($html), $title);
         $mf->setRemoveMedia($noImages);
         $mf->filterContent();
         $mf->setIsMainPage($this->mainPage && $wgMFSpecialCaseMainPage);
         $html = $mf->getText();
     }
     if ($this->mainPage || $this->file) {
         $data = array('sections' => array(), 'text' => array($html), 'refsections' => array());
     } else {
         $data = array();
         $data['sections'] = $parserOutput->getSections();
         $sectionCount = count($data['sections']);
         for ($i = 0; $i < $sectionCount; $i++) {
             $data['sections'][$i]['line'] = $title->getPageLanguage()->convert($data['sections'][$i]['line']);
         }
         $chunks = preg_split('/<h(?=[1-6]\\b)/i', $html);
         if (count($chunks) != count($data['sections']) + 1) {
             wfDebugLog('mobile', __METHOD__ . "(): mismatching number of " . "sections from parser and split on page {$title->getPrefixedText()}, oldid={$latest}");
             // We can't be sure about anything here, return all page HTML as one big section
             $chunks = array($html);
             $data['sections'] = array();
         }
         $data['text'] = array();
         $data['refsections'] = array();
         foreach ($chunks as $chunk) {
             if (count($data['text'])) {
                 $chunk = "<h{$chunk}";
             }
             if ($wgUseTidy && $wgMFTidyMobileViewSections && count($chunks) > 1) {
                 $chunk = MWTidy::tidy($chunk);
             }
             if (preg_match('/<ol\\b[^>]*?class="references"/', $chunk)) {
                 $data['refsections'][count($data['text'])] = true;
             }
             $data['text'][] = $chunk;
         }
         if ($this->usePageImages) {
             $image = $this->getPageImage($title);
             if ($image) {
                 $data['image'] = $image->getTitle()->getText();
             }
         }
     }
     $data['lastmodified'] = wfTimestamp(TS_ISO_8601, $wp->getTimestamp());
     // Page id
     $data['id'] = $wp->getId();
     $user = User::newFromId($wp->getUser());
     if (!$user->isAnon()) {
         $data['lastmodifiedby'] = array('name' => $wp->getUserText(), 'gender' => $user->getOption('gender'));
     } else {
         $data['lastmodifiedby'] = null;
     }
     $data['revision'] = $title->getLatestRevID();
     if (isset($parserOutput)) {
         $languages = $parserOutput->getLanguageLinks();
         $data['languagecount'] = count($languages);
         $data['displaytitle'] = $parserOutput->getDisplayTitle();
         // @fixme: Does no work for some extension properties that get added in LinksUpdate
         $data['pageprops'] = $parserOutput->getProperties();
     } else {
         $data['languagecount'] = 0;
         $data['displaytitle'] = $title->getPrefixedText();
         $data['pageprops'] = array();
     }
     if ($title->getPageLanguage()->hasVariants()) {
         $data['hasvariants'] = true;
     }
     // Don't store small pages to decrease cache size requirements
     if (strlen($html) >= $wgMFMinCachedPageSize) {
         // store for the same time as original parser output
         $wgMemc->set($key, $data, $cacheExpiry);
     }
     return $data;
 }
Exemple #12
0
 function truncateHtml($text, $length, $ellipsis = '...')
 {
     # Use the localized ellipsis character
     if ($ellipsis == '...') {
         $ellipsis = wfMsgExt('ellipsis', array('escapenoentities', 'language' => $this));
     }
     # Check if there is no need to truncate
     if ($length <= 0) {
         return $ellipsis;
         // no text shown, nothing to format
     } elseif (strlen($text) <= $length) {
         return $text;
         // string short enough even *with* HTML
     }
     $text = MWTidy::tidy($text);
     // fix tags
     $displayLen = 0;
     // innerHTML legth so far
     $testingEllipsis = false;
     // checking if ellipses will make string longer/equal?
     $tagType = 0;
     // 0-open, 1-close
     $bracketState = 0;
     // 1-tag start, 2-tag name, 0-neither
     $entityState = 0;
     // 0-not entity, 1-entity
     $tag = $ret = '';
     $openTags = array();
     // open tag stack
     $textLen = strlen($text);
     for ($pos = 0; $pos < $textLen; ++$pos) {
         $ch = $text[$pos];
         $lastCh = $pos ? $text[$pos - 1] : '';
         $ret .= $ch;
         // add to result string
         if ($ch == '<') {
             $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags);
             // for bad HTML
             $entityState = 0;
             // for bad HTML
             $bracketState = 1;
             // tag started (checking for backslash)
         } elseif ($ch == '>') {
             $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags);
             $entityState = 0;
             // for bad HTML
             $bracketState = 0;
             // out of brackets
         } elseif ($bracketState == 1) {
             if ($ch == '/') {
                 $tagType = 1;
                 // close tag (e.g. "</span>")
             } else {
                 $tagType = 0;
                 // open tag (e.g. "<span>")
                 $tag .= $ch;
             }
             $bracketState = 2;
             // building tag name
         } elseif ($bracketState == 2) {
             if ($ch != ' ') {
                 $tag .= $ch;
             } else {
                 // Name found (e.g. "<a href=..."), add on tag attributes...
                 $pos += $this->truncate_skip($ret, $text, "<>", $pos + 1);
             }
         } elseif ($bracketState == 0) {
             if ($entityState) {
                 if ($ch == ';') {
                     $entityState = 0;
                     $displayLen++;
                     // entity is one displayed char
                 }
             } else {
                 if ($ch == '&') {
                     $entityState = 1;
                     // entity found, (e.g. "&#160;")
                 } else {
                     $displayLen++;
                     // this char is displayed
                     // Add on the other display text after this...
                     $skipped = $this->truncate_skip($ret, $text, "<>&", $pos + 1, $length - $displayLen);
                     $displayLen += $skipped;
                     $pos += $skipped;
                 }
             }
         }
         # Consider truncation once the display length has reached the maximim.
         # Double-check that we're not in the middle of a bracket/entity...
         if ($displayLen >= $length && $bracketState == 0 && $entityState == 0) {
             if (!$testingEllipsis) {
                 $testingEllipsis = true;
                 # Save where we are; we will truncate here unless
                 # the ellipsis actually makes the string longer.
                 $pOpenTags = $openTags;
                 // save state
                 $pRet = $ret;
                 // save state
             } elseif ($displayLen > $length + strlen($ellipsis)) {
                 # Ellipsis won't make string longer/equal, the truncation point was OK.
                 $openTags = $pOpenTags;
                 // reload state
                 $ret = $this->removeBadCharLast($pRet);
                 // reload state, multi-byte char fix
                 $ret .= $ellipsis;
                 // add ellipsis
                 break;
             }
         }
     }
     if ($displayLen == 0) {
         return '';
         // no text shown, nothing to format
     }
     // Close the last tag if left unclosed by bad HTML
     $this->truncate_endBracket($tag, $text[$textLen - 1], $tagType, $openTags);
     while (count($openTags) > 0) {
         $ret .= '</' . array_pop($openTags) . '>';
         // close open tags
     }
     return $ret;
 }
 /**
  * Render CompaTable HTML code
  *
  * Reads from JSON file, triggers generation if required
  * and optionally adds ESI tags.
  *
  * @param string $input
  * @param array  $args
  * @param Parser $parser
  */
 public static function renderCompaTables($input, array $args, Parser $parser)
 {
     global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy;
     $out = '';
     $args['topic'] = isset($args['topic']) ? $args['topic'] : '';
     $args['feature'] = isset($args['feature']) ? $args['feature'] : '';
     $args['format'] = isset($args['format']) ? $args['format'] : '';
     $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']);
     /**   *****************************   **/
     $data = self::getData();
     if ($data !== null) {
         $cached = self::memcacheRead($args['cacheKey'], $data['hash']);
         if ($cached !== false) {
             $table = $cached['output'];
         } else {
             $generated = self::generateCompaTable($data, $args);
             if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) {
                 $generated['output'] = MWTidy::tidy($generated['output']);
             }
             self::memcacheSave($args['cacheKey'], $generated);
             $table = $generated['output'];
         }
         /**   *****************************   * */
         // We are ignoring <compatibility>input would be here</compatibility>
         // because its useless for now.
         //if ( $input != '' ) {
         //  $out .= '<p>' . $input . '</p>';
         //}
         if ($wgCompatablesUseESI === true) {
             $urlArgs['topic'] = $args['topic'];
             $urlArgs['feature'] = $args['feature'];
             $urlArgs['format'] = $args['format'];
             $urlArgs['foresi'] = 1;
             // @TODO: this breaks in ESI level if $url ends up http for https views
             $urlHelper = SpecialPage::getTitleFor('Compatables')->getFullUrl($urlArgs);
             $out .= self::applyEsiTags($table, wfExpandUrl($urlHelper, PROTO_INTERNAL));
         } else {
             $out .= $table;
             $parser->getOutput()->updateCacheExpiry(6 * 3600);
             // worse cache hit rate
         }
     } else {
         wfDebugLog('CompaTables', 'Could not generate table, data is either empty or had problems.');
         $out = '<!-- Compatables: Could not generate table, data might be empty or had problems with caching -->';
     }
     return $out;
 }
 /**
  * @group medium
  * @dataProvider parserTestProvider
  * @param string $desc
  * @param string $input
  * @param string $result
  * @param array $opts
  * @param array $config
  */
 public function testParserTest($desc, $input, $result, $opts, $config)
 {
     if ($this->regex != '' && !preg_match('/' . $this->regex . '/', $desc)) {
         $this->assertTrue(true);
         // XXX: don't flood output with "test made no assertions"
         //$this->markTestSkipped( 'Filtered out by the user' );
         return;
     }
     if (!$this->isWikitextNS(NS_MAIN)) {
         // parser tests frequently assume that the main namespace contains wikitext.
         // @todo When setting up pages, force the content model. Only skip if
         //        $wgtContentModelUseDB is false.
         $this->markTestSkipped("Main namespace does not support wikitext," . "skipping parser test: {$desc}");
     }
     wfDebug("Running parser test: {$desc}\n");
     $opts = $this->parseOptions($opts);
     $context = $this->setupGlobals($opts, $config);
     $user = $context->getUser();
     $options = ParserOptions::newFromContext($context);
     if (isset($opts['title'])) {
         $titleText = $opts['title'];
     } else {
         $titleText = 'Parser test';
     }
     $local = isset($opts['local']);
     $preprocessor = isset($opts['preprocessor']) ? $opts['preprocessor'] : null;
     $parser = $this->getParser($preprocessor);
     $title = Title::newFromText($titleText);
     # Parser test requiring math. Make sure texvc is executable
     # or just skip such tests.
     if (isset($opts['math']) || isset($opts['texvc'])) {
         global $wgTexvc;
         if (!isset($wgTexvc)) {
             $this->markTestSkipped("SKIPPED: \$wgTexvc is not set");
         } elseif (!is_executable($wgTexvc)) {
             $this->markTestSkipped("SKIPPED: texvc binary does not exist" . " or is not executable.\n" . "Current configuration is:\n\$wgTexvc = '{$wgTexvc}'");
         }
     }
     if (isset($opts['djvu'])) {
         if (!$this->djVuSupport->isEnabled()) {
             $this->markTestSkipped("SKIPPED: djvu binaries do not exist or are not executable.\n");
         }
     }
     if (isset($opts['pst'])) {
         $out = $parser->preSaveTransform($input, $title, $user, $options);
     } elseif (isset($opts['msg'])) {
         $out = $parser->transformMsg($input, $options, $title);
     } elseif (isset($opts['section'])) {
         $section = $opts['section'];
         $out = $parser->getSection($input, $section);
     } elseif (isset($opts['replace'])) {
         $section = $opts['replace'][0];
         $replace = $opts['replace'][1];
         $out = $parser->replaceSection($input, $section, $replace);
     } elseif (isset($opts['comment'])) {
         $out = Linker::formatComment($input, $title, $local);
     } elseif (isset($opts['preload'])) {
         $out = $parser->getPreloadText($input, $title, $options);
     } else {
         $output = $parser->parse($input, $title, $options, true, true, 1337);
         $output->setTOCEnabled(!isset($opts['notoc']));
         $out = $output->getText();
         if (isset($opts['tidy'])) {
             if (!$this->tidySupport->isEnabled()) {
                 $this->markTestSkipped("SKIPPED: tidy extension is not installed.\n");
             } else {
                 $out = MWTidy::tidy($out);
                 $out = preg_replace('/\\s+$/', '', $out);
             }
         }
         if (isset($opts['showtitle'])) {
             if ($output->getTitleText()) {
                 $title = $output->getTitleText();
             }
             $out = "{$title}\n{$out}";
         }
         if (isset($opts['ill'])) {
             $out = implode(' ', $output->getLanguageLinks());
         } elseif (isset($opts['cat'])) {
             $outputPage = $context->getOutput();
             $outputPage->addCategoryLinks($output->getCategories());
             $cats = $outputPage->getCategoryLinks();
             if (isset($cats['normal'])) {
                 $out = implode(' ', $cats['normal']);
             } else {
                 $out = '';
             }
         }
         $parser->mPreprocessor = null;
     }
     $this->teardownGlobals();
     $this->assertEquals($result, $out, $desc);
 }