Пример #1
0
 /**
  * @dataProvider provideTestWrapping
  */
 public function testTidyWrapping($expected, $text, $msg = '')
 {
     $text = MWTidy::tidy($text);
     // We don't care about where Tidy wants to stick is <p>s
     $text = trim(preg_replace('#</?p>#', '', $text));
     // Windows, we love you!
     $text = str_replace("\r", '', $text);
     $this->assertEquals($expected, $text, $msg);
 }
Пример #2
0
 function execute($subpage)
 {
     global $wgRequest, $wgParser, $wgOut;
     $this->setHeaders();
     $this->isNewParser = is_callable(array($wgParser, 'preprocessToDom'));
     $titleStr = $wgRequest->getText('contexttitle');
     $title = Title::newFromText($titleStr);
     $selfTitle = $this->getTitle();
     if (!$title) {
         $title = $selfTitle;
     }
     $input = $wgRequest->getText('input');
     $this->generateXML = $this->isNewParser ? $wgRequest->getBool('generate_xml') : false;
     if (strlen($input)) {
         $this->removeComments = $wgRequest->getBool('removecomments', false);
         $this->removeNowiki = $wgRequest->getBool('removenowiki', false);
         $options = new ParserOptions();
         $options->setRemoveComments($this->removeComments);
         $options->setTidy(true);
         $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE);
         if ($this->generateXML) {
             $wgParser->startExternalParse($title, $options, OT_PREPROCESS);
             $dom = $wgParser->preprocessToDom($input);
             if (is_callable(array($dom, 'saveXML'))) {
                 $xml = $dom->saveXML();
             } else {
                 $xml = $dom->__toString();
             }
         }
         $output = $wgParser->preprocess($input, $title, $options);
     } else {
         $this->removeComments = $wgRequest->getBool('removecomments', true);
         $this->removeNowiki = $wgRequest->getBool('removenowiki', false);
         $output = false;
     }
     $wgOut->addWikiText(wfMsg('expand_templates_intro'));
     $wgOut->addHTML($this->makeForm($titleStr, $input));
     if ($output !== false) {
         global $wgUseTidy, $wgAlwaysUseTidy;
         if ($this->generateXML) {
             $wgOut->addHTML($this->makeOutput($xml, 'expand_templates_xml_output'));
         }
         $tmp = $this->makeOutput($output);
         if ($this->removeNowiki) {
             $tmp = preg_replace(array('_&lt;nowiki&gt;_', '_&lt;/nowiki&gt;_', '_&lt;nowiki */&gt;_'), '', $tmp);
         }
         if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) {
             $tmp = MWTidy::tidy($tmp);
         }
         $wgOut->addHTML($tmp);
         $this->showHtmlPreview($title, $output, $wgOut);
     }
 }
Пример #3
0
 /**
  * Asserts that the given string is valid HTML document.
  *
  * @since 1.23
  *
  * @note Will mark the test as skipped if the "tidy" module is not installed.
  * @note This ignores $wgUseTidy, so we can check for valid HTML even (and especially)
  *        when automatic tidying is disabled.
  *
  * @param string $html A complete HTML document
  */
 protected function assertValidHtmlDocument($html)
 {
     // Note: we only validate if the tidy PHP extension is available.
     // In case wgTidyInternal is false, MWTidy would fall back to the command line version
     // of tidy. In that case however, we can not reliably detect whether a failing validation
     // is due to malformed HTML, or caused by tidy not being installed as a command line tool.
     // That would cause all HTML assertions to fail on a system that has no tidy installed.
     if (!$GLOBALS['wgTidyInternal'] || !MWTidy::isEnabled()) {
         $this->markTestSkipped('Tidy extension not installed');
     }
     $errorBuffer = '';
     MWTidy::checkErrors($html, $errorBuffer);
     $allErrors = preg_split('/[\\r\\n]+/', $errorBuffer);
     // Filter Tidy warnings which aren't useful for us.
     // Tidy eg. often cries about parameters missing which have actually
     // been deprecated since HTML4, thus we should not care about them.
     $errors = preg_grep('/^(.*Warning: (trimming empty|.* lacks ".*?" attribute).*|\\s*)$/m', $allErrors, PREG_GREP_INVERT);
     $this->assertEmpty($errors, implode("\n", $errors));
 }
Пример #4
0
/**
 * Replace the output with an error if the HTML is not valid
 *
 * @param $s string
 *
 * @return string
 */
function wfHtmlValidationHandler($s)
{
    $errors = '';
    if (MWTidy::checkErrors($s, $errors)) {
        return $s;
    }
    header('Cache-Control: no-cache');
    $out = <<<EOT
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" dir="ltr">
<head>
<title>HTML validation error</title>
<style>
.highlight { background-color: #ffc }
li { white-space: pre }
</style>
</head>
<body>
<h1>HTML validation error</h1>
<ul>
EOT;
    $error = strtok($errors, "\n");
    $badLines = array();
    while ($error !== false) {
        if (preg_match('/^line (\\d+)/', $error, $m)) {
            $lineNum = intval($m[1]);
            $badLines[$lineNum] = true;
            $out .= "<li><a href=\"#line-{$lineNum}\">" . htmlspecialchars($error) . "</a></li>\n";
        }
        $error = strtok("\n");
    }
    $out .= '</ul>';
    $out .= '<pre>' . htmlspecialchars($errors) . '</pre>';
    $out .= "<ol>\n";
    $line = strtok($s, "\n");
    $i = 1;
    while ($line !== false) {
        if (isset($badLines[$i])) {
            $out .= "<li class=\"highlight\" id=\"line-{$i}\">";
        } else {
            $out .= '<li>';
        }
        $out .= htmlspecialchars($line) . "</li>\n";
        $line = strtok("\n");
        $i++;
    }
    $out .= '</ol></body></html>';
    return $out;
}
Пример #5
0
 /**
  * Set up the global variables for a consistent environment for each test.
  * Ideally this should replace the global configuration entirely.
  * @param array $opts
  * @param string $config
  * @return RequestContext
  */
 protected function setupGlobals($opts = array(), $config = '')
 {
     global $wgFileBackends;
     # Find out values for some special options.
     $lang = self::getOptionValue('language', $opts, 'en');
     $variant = self::getOptionValue('variant', $opts, false);
     $maxtoclevel = self::getOptionValue('wgMaxTocLevel', $opts, 999);
     $linkHolderBatchSize = self::getOptionValue('wgLinkHolderBatchSize', $opts, 1000);
     $uploadDir = $this->getUploadDir();
     if ($this->getCliArg('use-filebackend')) {
         if (self::$backendToUse) {
             $backend = self::$backendToUse;
         } else {
             $name = $this->getCliArg('use-filebackend');
             $useConfig = array();
             foreach ($wgFileBackends as $conf) {
                 if ($conf['name'] == $name) {
                     $useConfig = $conf;
                 }
             }
             $useConfig['name'] = 'local-backend';
             // swap name
             unset($useConfig['lockManager']);
             unset($useConfig['fileJournal']);
             $class = $useConfig['class'];
             self::$backendToUse = new $class($useConfig);
             $backend = self::$backendToUse;
         }
     } else {
         # Replace with a mock. We do not care about generating real
         # files on the filesystem, just need to expose the file
         # informations.
         $backend = new MockFileBackend(array('name' => 'local-backend', 'wikiId' => wfWikiId()));
     }
     $settings = array('wgLocalFileRepo' => array('class' => 'LocalRepo', 'name' => 'local', 'url' => 'http://example.com/images', 'hashLevels' => 2, 'transformVia404' => false, 'backend' => $backend), 'wgEnableUploads' => self::getOptionValue('wgEnableUploads', $opts, true), 'wgLanguageCode' => $lang, 'wgDBprefix' => $this->db->getType() != 'oracle' ? 'unittest_' : 'ut_', 'wgRawHtml' => self::getOptionValue('wgRawHtml', $opts, false), 'wgNamespacesWithSubpages' => array(NS_MAIN => isset($opts['subpage'])), 'wgAllowExternalImages' => self::getOptionValue('wgAllowExternalImages', $opts, true), 'wgThumbLimits' => array(self::getOptionValue('thumbsize', $opts, 180)), 'wgMaxTocLevel' => $maxtoclevel, 'wgUseTeX' => isset($opts['math']) || isset($opts['texvc']), 'wgMathDirectory' => $uploadDir . '/math', 'wgDefaultLanguageVariant' => $variant, 'wgLinkHolderBatchSize' => $linkHolderBatchSize, 'wgUseTidy' => isset($opts['tidy']));
     if ($config) {
         $configLines = explode("\n", $config);
         foreach ($configLines as $line) {
             list($var, $value) = explode('=', $line, 2);
             $settings[$var] = eval("return {$value};");
             // ???
         }
     }
     $this->savedGlobals = array();
     /** @since 1.20 */
     Hooks::run('ParserTestGlobals', array(&$settings));
     $langObj = Language::factory($lang);
     $settings['wgContLang'] = $langObj;
     $settings['wgLang'] = $langObj;
     $context = new RequestContext();
     $settings['wgOut'] = $context->getOutput();
     $settings['wgUser'] = $context->getUser();
     $settings['wgRequest'] = $context->getRequest();
     // We (re)set $wgThumbLimits to a single-element array above.
     $context->getUser()->setOption('thumbsize', 0);
     foreach ($settings as $var => $val) {
         if (array_key_exists($var, $GLOBALS)) {
             $this->savedGlobals[$var] = $GLOBALS[$var];
         }
         $GLOBALS[$var] = $val;
     }
     MWTidy::destroySingleton();
     MagicWord::clearCache();
     # The entries saved into RepoGroup cache with previous globals will be wrong.
     RepoGroup::destroySingleton();
     FileBackendGroup::destroySingleton();
     # Create dummy files in storage
     $this->setupUploads();
     # Publish the articles after we have the final language set
     $this->publishTestArticles();
     MessageCache::destroyInstance();
     return $context;
 }
Пример #6
0
 /**
  * Render CompaTable HTML code
  *
  * Reads from JSON file, triggers generation if required
  * and optionally adds ESI tags.
  *
  * @param string $input
  * @param array  $args
  * @param Parser $parser
  */
 public static function renderCompaTables($input, array $args, Parser $parser)
 {
     global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy;
     $out = '';
     $args['topic'] = isset($args['topic']) ? $args['topic'] : '';
     $args['feature'] = isset($args['feature']) ? $args['feature'] : '';
     $args['format'] = isset($args['format']) ? $args['format'] : '';
     $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']);
     /**   *****************************   **/
     $data = self::getData();
     if ($data !== null) {
         $cached = self::memcacheRead($args['cacheKey'], $data['hash']);
         if ($cached !== false) {
             $table = $cached['output'];
         } else {
             $generated = self::generateCompaTable($data, $args);
             if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) {
                 $generated['output'] = MWTidy::tidy($generated['output']);
             }
             self::memcacheSave($args['cacheKey'], $generated);
             $table = $generated['output'];
         }
         /**   *****************************   * */
         // We are ignoring <compatibility>input would be here</compatibility>
         // because its useless for now.
         //if ( $input != '' ) {
         //  $out .= '<p>' . $input . '</p>';
         //}
         if ($wgCompatablesUseESI === true) {
             $urlArgs['topic'] = $args['topic'];
             $urlArgs['feature'] = $args['feature'];
             $urlArgs['format'] = $args['format'];
             $urlArgs['foresi'] = 1;
             // @TODO: this breaks in ESI level if $url ends up http for https views
             $urlHelper = SpecialPage::getTitleFor('Compatables')->getFullUrl($urlArgs);
             $out .= self::applyEsiTags($table, wfExpandUrl($urlHelper, PROTO_INTERNAL));
         } else {
             $out .= $table;
             $parser->getOutput()->updateCacheExpiry(6 * 3600);
             // worse cache hit rate
         }
     } else {
         wfDebugLog('CompaTables', 'Could not generate table, data is either empty or had problems.');
         $out = '<!-- Compatables: Could not generate table, data might be empty or had problems with caching -->';
     }
     return $out;
 }
Пример #7
0
/**
 * Replace the output with an error if the HTML is not valid
 *
 * @param $s string
 *
 * @return string
 */
function wfHtmlValidationHandler($s)
{
    $errors = '';
    if (MWTidy::checkErrors($s, $errors)) {
        return $s;
    }
    header('Cache-Control: no-cache');
    $out = Html::element('h1', null, 'HTML validation error');
    $out .= Html::openElement('ul');
    $error = strtok($errors, "\n");
    $badLines = array();
    while ($error !== false) {
        if (preg_match('/^line (\\d+)/', $error, $m)) {
            $lineNum = intval($m[1]);
            $badLines[$lineNum] = true;
            $out .= Html::rawElement('li', null, Html::element('a', array('href' => "#line-{$lineNum}"), $error)) . "\n";
        }
        $error = strtok("\n");
    }
    $out .= Html::closeElement('ul');
    $out .= Html::element('pre', null, $errors);
    $out .= Html::openElement('ol') . "\n";
    $line = strtok($s, "\n");
    $i = 1;
    while ($line !== false) {
        $attrs = array();
        if (isset($badLines[$i])) {
            $attrs['class'] = 'highlight';
            $attrs['id'] = "line-{$i}";
        }
        $out .= Html::element('li', $attrs, $line) . "\n";
        $line = strtok("\n");
        $i++;
    }
    $out .= Html::closeElement('ol');
    $style = <<<CSS
.highlight { background-color: #ffc }
li { white-space: pre }
CSS;
    $out = Html::htmlHeader(array('lang' => 'en', 'dir' => 'ltr')) . Html::rawElement('head', null, Html::element('title', null, 'HTML validation error') . Html::inlineStyle($style)) . Html::rawElement('body', null, $out) . Html::closeElement('html');
    return $out;
}
Пример #8
0
 /**
  * Convert wikitext to HTML
  * Do not call this function recursively.
  *
  * @param string $text text we want to parse
  * @param $title Title object
  * @param $options ParserOptions
  * @param $linestart boolean
  * @param $clearState boolean
  * @param int $revid number to pass in {{REVISIONID}}
  * @return ParserOutput a ParserOutput
  */
 public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null)
 {
     /**
      * First pass--just handle <nowiki> sections, pass the rest off
      * to internalParse() which does all the real work.
      */
     global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames;
     $fname = __METHOD__ . '-' . wfGetCaller();
     wfProfileIn(__METHOD__);
     wfProfileIn($fname);
     $this->startParse($title, $options, self::OT_HTML, $clearState);
     $this->mInputSize = strlen($text);
     if ($this->mOptions->getEnableLimitReport()) {
         $this->mOutput->resetParseStartTime();
     }
     # Remove the strip marker tag prefix from the input, if present.
     if ($clearState) {
         $text = str_replace($this->mUniqPrefix, '', $text);
     }
     $oldRevisionId = $this->mRevisionId;
     $oldRevisionObject = $this->mRevisionObject;
     $oldRevisionTimestamp = $this->mRevisionTimestamp;
     $oldRevisionUser = $this->mRevisionUser;
     $oldRevisionSize = $this->mRevisionSize;
     if ($revid !== null) {
         $this->mRevisionId = $revid;
         $this->mRevisionObject = null;
         $this->mRevisionTimestamp = null;
         $this->mRevisionUser = null;
         $this->mRevisionSize = null;
     }
     wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState));
     # No more strip!
     wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState));
     $text = $this->internalParse($text);
     wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState));
     $text = $this->mStripState->unstripGeneral($text);
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         if (!$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     /**
      * A converted title will be provided in the output object if title and
      * content conversion are enabled, the article text does not contain
      * a conversion-suppressing double-underscore tag, and no
      * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
      * automatic link conversion.
      */
     if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) {
         $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
         if ($convruletitle) {
             $this->mOutput->setTitleText($convruletitle);
         } else {
             $titleText = $this->getConverterLanguage()->convertTitle($title);
             $this->mOutput->setTitleText($titleText);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     wfRunHooks('ParserBeforeTidy', array(&$this, &$text));
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) {
         $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit());
     }
     wfRunHooks('ParserAfterTidy', array(&$this, &$text));
     # Information on include size limits, for the benefit of users who try to skirt them
     if ($this->mOptions->getEnableLimitReport()) {
         $max = $this->mOptions->getMaxIncludeSize();
         $cpuTime = $this->mOutput->getTimeSinceStart('cpu');
         if ($cpuTime !== null) {
             $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime));
         }
         $wallTime = $this->mOutput->getTimeSinceStart('wall');
         $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime));
         $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount()));
         $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount()));
         $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max));
         $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max));
         $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth()));
         $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()));
         wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput));
         $limitReport = "NewPP limit report\n";
         if ($wgShowHostnames) {
             $limitReport .= 'Parsed by ' . wfHostname() . "\n";
         }
         foreach ($this->mOutput->getLimitReportData() as $key => $value) {
             if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) {
                 $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false);
                 $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false);
                 if (!$valueMsg->exists()) {
                     $valueMsg = new RawMessage('$1');
                 }
                 if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) {
                     $valueMsg->params($value);
                     $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
                 }
             }
         }
         // Since we're not really outputting HTML, decode the entities and
         // then re-encode the things that need hiding inside HTML comments.
         $limitReport = htmlspecialchars_decode($limitReport);
         wfRunHooks('ParserLimitReport', array($this, &$limitReport));
         // Sanitize for comment. Note '‐' in the replacement is U+2010,
         // which looks much like the problematic '-'.
         $limitReport = str_replace(array('-', '&'), array('‐', '&amp;'), $limitReport);
         $text .= "\n<!-- \n{$limitReport}-->\n";
         if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) {
             wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey());
         }
     }
     $this->mOutput->setText($text);
     $this->mRevisionId = $oldRevisionId;
     $this->mRevisionObject = $oldRevisionObject;
     $this->mRevisionTimestamp = $oldRevisionTimestamp;
     $this->mRevisionUser = $oldRevisionUser;
     $this->mRevisionSize = $oldRevisionSize;
     $this->mInputSize = false;
     wfProfileOut($fname);
     wfProfileOut(__METHOD__);
     return $this->mOutput;
 }
Пример #9
0
 /**
  * Interface with html tidy
  * @deprecated Use MWTidy::tidy()
  */
 public static function tidy($text)
 {
     wfDeprecated(__METHOD__);
     return MWTidy::tidy($text);
 }
Пример #10
0
 public function execute($par)
 {
     global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy, $wgCompatablesJsonFileUrl;
     $this->setHeaders();
     $args['topic'] = $this->getRequest()->getVal('topic');
     $args['feature'] = $this->getRequest()->getVal('feature');
     $args['format'] = $this->getRequest()->getVal('format');
     $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']);
     // Handle purge requests from admins...
     // @TODO: Varnish, which only supports a few bits of ESI, can not handle this
     // (https://www.varnish-cache.org/docs/3.0/tutorial/esi.html)
     // (https://www.varnish-cache.org/trac/wiki/Future_ESI)
     if ($this->getRequest()->getVal('action') === 'purge') {
         // See https://github.com/webplatform/mediawiki/issues/16 #TODO
         Compatables::memcacheRemove(wfMemcKey('webplatformdocs', 'compatables', 'data', 'full'));
         Compatables::memcacheRemove($args['cacheKey']);
         try {
             $req = MWHttpRequest::factory($wgCompatablesJsonFileUrl, array('method' => 'PURGE'));
             $status = $req->execute();
             wfDebugLog('CompaTables', 'cURL PURGE done to "' . $wgCompatablesJsonFileUrl);
         } catch (Exception $e) {
             wfDebugLog('CompaTables', 'Had problems with cURL PURGE to "' . $wgCompatablesJsonFileUrl . '", message ' . $e->getMessage());
             // Do nothing
         }
         if ($wgCompatablesUseESI && $this->getUser()->isAllowed('purgecompatables')) {
             // Get the ESI URL prefix to purge
             $urlPrefix = SpecialPage::getTitleFor('Compatables')->getFullUrl();
             $urlPrefix = wfExpandUrl($urlPrefix, PROTO_INTERNAL);
             // Include as an in-band ESI invalidation request
             $this->getOutput()->addHtml("\n<esi:invalidate>\n" . "<?xml version=\"1.0\"?>\n" . "<!DOCTYPE INVALIDATION SYSTEM \"internal:///WCSinvalidation.dtd\">\n" . "<INVALIDATION VERSION=\"WCS-1.1\">\n" . "<OBJECT>\n" . Xml::element('ADVANCEDSELECTOR', array('URIPREFIX' => $urlPrefix)) . "\n<ACTION REMOVALTTL=\"0\"/>\n" . "</OBJECT>\n" . "</INVALIDATION>\n" . "</esi:invalidate>\n");
             $this->getOutput()->addWikiMsg('compatables-purged');
             return;
         } else {
             throw new PermissionsError('purgecompatables');
         }
     }
     // 1 hour server-side cache max before revalidate
     $this->getOutput()->setSquidMaxage(Compatables::MAX_AGE);
     // Try to handle IMS GET requests from CDN efficiently
     //   $data['timestamp'] has been added, to not have to do
     //   2 HTTP queries
     $data = Compatables::getData();
     if ($this->getOutput()->checkLastModified($data['timestamp'])) {
         return;
         // nothing to send (cache hit)
     }
     /**   *****************************   **/
     $cached = Compatables::memcacheRead($args['cacheKey'], $data['hash']);
     if ($cached !== false) {
         $table = $cached['output'];
     } else {
         $generated = Compatables::generateCompaTable($data, $args);
         if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) {
             $generated['output'] = MWTidy::tidy($generated['output']);
         }
         Compatables::memcacheSave($args['cacheKey'], $generated);
         $table = $generated['output'];
     }
     /**   *****************************   **/
     $this->getOutput()->addHtml('<h1>Compatibility data</h1>');
     //$this->getOutput()->addHtml('<pre><nowiki>'.print_r($data,1).'</nowiki></pre>');
     if ($this->getRequest()->getBool('foresi')) {
         $this->getOutput()->addHtml($table);
         $this->getOutput()->setArticleBodyOnly(true);
     } else {
         $this->getOutput()->addHtml($table);
     }
 }
 /**
  * Get data of requested article.
  * @param Title $title
  * @param boolean $noImages
  * @return array
  */
 private function getData(Title $title, $noImages)
 {
     global $wgMemc, $wgUseTidy, $wgMFTidyMobileViewSections, $wgMFMinCachedPageSize, $wgMFSpecialCaseMainPage;
     $wp = $this->makeWikiPage($title);
     if ($this->followRedirects && $wp->isRedirect()) {
         $newTitle = $wp->getRedirectTarget();
         if ($newTitle) {
             $title = $newTitle;
             $this->getResult()->addValue(null, $this->getModuleName(), array('redirected' => $title->getPrefixedText()));
             if ($title->getNamespace() < 0) {
                 $this->getResult()->addValue(null, $this->getModuleName(), array('viewable' => 'no'));
                 return array();
             }
             $wp = $this->makeWikiPage($title);
         }
     }
     $latest = $wp->getLatest();
     if ($this->file) {
         $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $this->file->getSha1(), $this->variant);
         $cacheExpiry = 3600;
     } else {
         if (!$latest) {
             // https://bugzilla.wikimedia.org/show_bug.cgi?id=53378
             // Title::exists() above doesn't seem to always catch recently deleted pages
             $this->dieUsageMsg(array('notanarticle', $title->getPrefixedText()));
         }
         $parserOptions = $this->makeParserOptions($wp);
         $parserCacheKey = ParserCache::singleton()->getKey($wp, $parserOptions);
         $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $parserCacheKey);
     }
     $data = $wgMemc->get($key);
     if ($data) {
         wfIncrStats('mobile.view.cache-hit');
         return $data;
     }
     wfIncrStats('mobile.view.cache-miss');
     if ($this->file) {
         $html = $this->getFilePage($title);
     } else {
         $parserOutput = $this->getParserOutput($wp, $parserOptions);
         $html = $parserOutput->getText();
         $cacheExpiry = $parserOutput->getCacheExpiry();
     }
     if (!$this->noTransform) {
         $mf = new MobileFormatter(MobileFormatter::wrapHTML($html), $title);
         $mf->setRemoveMedia($noImages);
         $mf->filterContent();
         $mf->setIsMainPage($this->mainPage && $wgMFSpecialCaseMainPage);
         $html = $mf->getText();
     }
     if ($this->mainPage || $this->file) {
         $data = array('sections' => array(), 'text' => array($html), 'refsections' => array());
     } else {
         $data = array();
         $data['sections'] = $parserOutput->getSections();
         $sectionCount = count($data['sections']);
         for ($i = 0; $i < $sectionCount; $i++) {
             $data['sections'][$i]['line'] = $title->getPageLanguage()->convert($data['sections'][$i]['line']);
         }
         $chunks = preg_split('/<h(?=[1-6]\\b)/i', $html);
         if (count($chunks) != count($data['sections']) + 1) {
             wfDebugLog('mobile', __METHOD__ . "(): mismatching number of " . "sections from parser and split on page {$title->getPrefixedText()}, oldid={$latest}");
             // We can't be sure about anything here, return all page HTML as one big section
             $chunks = array($html);
             $data['sections'] = array();
         }
         $data['text'] = array();
         $data['refsections'] = array();
         foreach ($chunks as $chunk) {
             if (count($data['text'])) {
                 $chunk = "<h{$chunk}";
             }
             if ($wgUseTidy && $wgMFTidyMobileViewSections && count($chunks) > 1) {
                 $chunk = MWTidy::tidy($chunk);
             }
             if (preg_match('/<ol\\b[^>]*?class="references"/', $chunk)) {
                 $data['refsections'][count($data['text'])] = true;
             }
             $data['text'][] = $chunk;
         }
         if ($this->usePageImages) {
             $image = $this->getPageImage($title);
             if ($image) {
                 $data['image'] = $image->getTitle()->getText();
             }
         }
     }
     $data['lastmodified'] = wfTimestamp(TS_ISO_8601, $wp->getTimestamp());
     // Page id
     $data['id'] = $wp->getId();
     $user = User::newFromId($wp->getUser());
     if (!$user->isAnon()) {
         $data['lastmodifiedby'] = array('name' => $wp->getUserText(), 'gender' => $user->getOption('gender'));
     } else {
         $data['lastmodifiedby'] = null;
     }
     $data['revision'] = $title->getLatestRevID();
     if (isset($parserOutput)) {
         $languages = $parserOutput->getLanguageLinks();
         $data['languagecount'] = count($languages);
         $data['displaytitle'] = $parserOutput->getDisplayTitle();
         // @fixme: Does no work for some extension properties that get added in LinksUpdate
         $data['pageprops'] = $parserOutput->getProperties();
     } else {
         $data['languagecount'] = 0;
         $data['displaytitle'] = $title->getPrefixedText();
         $data['pageprops'] = array();
     }
     if ($title->getPageLanguage()->hasVariants()) {
         $data['hasvariants'] = true;
     }
     // Don't store small pages to decrease cache size requirements
     if (strlen($html) >= $wgMFMinCachedPageSize) {
         // store for the same time as original parser output
         $wgMemc->set($key, $data, $cacheExpiry);
     }
     return $data;
 }
Пример #12
0
 /**
  * Destroy the current singleton instance
  */
 public static function destroySingleton()
 {
     self::$instance = null;
 }
Пример #13
0
 /**
  * @dataProvider dataRemoveHTMLtags
  * @covers Sanitizer::removeHTMLtags
  */
 public function testRemoveHTMLtags($input, $output, $msg = null)
 {
     MWTidy::setInstance(false);
     $this->assertEquals($output, Sanitizer::removeHTMLtags($input), $msg);
 }
Пример #14
0
 function truncateHtml($text, $length, $ellipsis = '...')
 {
     # Use the localized ellipsis character
     if ($ellipsis == '...') {
         $ellipsis = wfMsgExt('ellipsis', array('escapenoentities', 'language' => $this));
     }
     # Check if there is no need to truncate
     if ($length <= 0) {
         return $ellipsis;
         // no text shown, nothing to format
     } elseif (strlen($text) <= $length) {
         return $text;
         // string short enough even *with* HTML
     }
     $text = MWTidy::tidy($text);
     // fix tags
     $displayLen = 0;
     // innerHTML legth so far
     $testingEllipsis = false;
     // checking if ellipses will make string longer/equal?
     $tagType = 0;
     // 0-open, 1-close
     $bracketState = 0;
     // 1-tag start, 2-tag name, 0-neither
     $entityState = 0;
     // 0-not entity, 1-entity
     $tag = $ret = '';
     $openTags = array();
     // open tag stack
     $textLen = strlen($text);
     for ($pos = 0; $pos < $textLen; ++$pos) {
         $ch = $text[$pos];
         $lastCh = $pos ? $text[$pos - 1] : '';
         $ret .= $ch;
         // add to result string
         if ($ch == '<') {
             $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags);
             // for bad HTML
             $entityState = 0;
             // for bad HTML
             $bracketState = 1;
             // tag started (checking for backslash)
         } elseif ($ch == '>') {
             $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags);
             $entityState = 0;
             // for bad HTML
             $bracketState = 0;
             // out of brackets
         } elseif ($bracketState == 1) {
             if ($ch == '/') {
                 $tagType = 1;
                 // close tag (e.g. "</span>")
             } else {
                 $tagType = 0;
                 // open tag (e.g. "<span>")
                 $tag .= $ch;
             }
             $bracketState = 2;
             // building tag name
         } elseif ($bracketState == 2) {
             if ($ch != ' ') {
                 $tag .= $ch;
             } else {
                 // Name found (e.g. "<a href=..."), add on tag attributes...
                 $pos += $this->truncate_skip($ret, $text, "<>", $pos + 1);
             }
         } elseif ($bracketState == 0) {
             if ($entityState) {
                 if ($ch == ';') {
                     $entityState = 0;
                     $displayLen++;
                     // entity is one displayed char
                 }
             } else {
                 if ($ch == '&') {
                     $entityState = 1;
                     // entity found, (e.g. "&#160;")
                 } else {
                     $displayLen++;
                     // this char is displayed
                     // Add on the other display text after this...
                     $skipped = $this->truncate_skip($ret, $text, "<>&", $pos + 1, $length - $displayLen);
                     $displayLen += $skipped;
                     $pos += $skipped;
                 }
             }
         }
         # Consider truncation once the display length has reached the maximim.
         # Double-check that we're not in the middle of a bracket/entity...
         if ($displayLen >= $length && $bracketState == 0 && $entityState == 0) {
             if (!$testingEllipsis) {
                 $testingEllipsis = true;
                 # Save where we are; we will truncate here unless
                 # the ellipsis actually makes the string longer.
                 $pOpenTags = $openTags;
                 // save state
                 $pRet = $ret;
                 // save state
             } elseif ($displayLen > $length + strlen($ellipsis)) {
                 # Ellipsis won't make string longer/equal, the truncation point was OK.
                 $openTags = $pOpenTags;
                 // reload state
                 $ret = $this->removeBadCharLast($pRet);
                 // reload state, multi-byte char fix
                 $ret .= $ellipsis;
                 // add ellipsis
                 break;
             }
         }
     }
     if ($displayLen == 0) {
         return '';
         // no text shown, nothing to format
     }
     // Close the last tag if left unclosed by bad HTML
     $this->truncate_endBracket($tag, $text[$textLen - 1], $tagType, $openTags);
     while (count($openTags) > 0) {
         $ret .= '</' . array_pop($openTags) . '>';
         // close open tags
     }
     return $ret;
 }
Пример #15
0
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @param string $text
  * @param callable $processCallback Callback to do any variable or parameter
  *   replacements in HTML attribute values
  * @param array|bool $args Arguments for the processing callback
  * @param array $extratags For any extra tags to include
  * @param array $removetags For any tags (default or extra) to exclude
  * @return string
  */
 public static function removeHTMLtags($text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array())
 {
     extract(self::getRecognizedTagData($extratags, $removetags));
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = str_replace('>', '&gt;', array_shift($bits));
     if (!MWTidy::isEnabled()) {
         $tagstack = $tablestack = array();
         foreach ($bits as $x) {
             $regs = array();
             # $slash: Does the current element start with a '/'?
             # $t: Current element name
             # $params: String between element name and >
             # $brace: Ending '>' or '/>'
             # $rest: Everything until the next element of $bits
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
             } else {
                 $slash = $t = $params = $brace = $rest = null;
             }
             $badtag = false;
             if (isset($htmlelements[$t = strtolower($t)])) {
                 # Check our stack
                 if ($slash && isset($htmlsingleonly[$t])) {
                     $badtag = true;
                 } elseif ($slash) {
                     # Closing a tag... is it the one we just opened?
                     MediaWiki\suppressWarnings();
                     $ot = array_pop($tagstack);
                     MediaWiki\restoreWarnings();
                     if ($ot != $t) {
                         if (isset($htmlsingleallowed[$ot])) {
                             # Pop all elements with an optional close tag
                             # and see if we find a match below them
                             $optstack = array();
                             array_push($optstack, $ot);
                             MediaWiki\suppressWarnings();
                             $ot = array_pop($tagstack);
                             MediaWiki\restoreWarnings();
                             while ($ot != $t && isset($htmlsingleallowed[$ot])) {
                                 array_push($optstack, $ot);
                                 MediaWiki\suppressWarnings();
                                 $ot = array_pop($tagstack);
                                 MediaWiki\restoreWarnings();
                             }
                             if ($t != $ot) {
                                 # No match. Push the optional elements back again
                                 $badtag = true;
                                 MediaWiki\suppressWarnings();
                                 $ot = array_pop($optstack);
                                 MediaWiki\restoreWarnings();
                                 while ($ot) {
                                     array_push($tagstack, $ot);
                                     MediaWiki\suppressWarnings();
                                     $ot = array_pop($optstack);
                                     MediaWiki\restoreWarnings();
                                 }
                             }
                         } else {
                             MediaWiki\suppressWarnings();
                             array_push($tagstack, $ot);
                             MediaWiki\restoreWarnings();
                             # <li> can be nested in <ul> or <ol>, skip those cases:
                             if (!isset($htmllist[$ot]) || !isset($listtags[$t])) {
                                 $badtag = true;
                             }
                         }
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                     }
                     $newparams = '';
                 } else {
                     # Keep track for later
                     if (isset($tabletags[$t]) && !in_array('table', $tagstack)) {
                         $badtag = true;
                     } elseif (in_array($t, $tagstack) && !isset($htmlnest[$t])) {
                         $badtag = true;
                         # Is it a self closed htmlpair ? (bug 5487)
                     } elseif ($brace == '/>' && isset($htmlpairs[$t])) {
                         $badtag = true;
                     } elseif (isset($htmlsingleonly[$t])) {
                         # Hack to force empty tag for unclosable elements
                         $brace = '/>';
                     } elseif (isset($htmlsingle[$t])) {
                         # Hack to not close $htmlsingle tags
                         $brace = null;
                         # Still need to push this optionally-closed tag to
                         # the tag stack so that we can match end tags
                         # instead of marking them as bad.
                         array_push($tagstack, $t);
                     } elseif (isset($tabletags[$t]) && in_array($t, $tagstack)) {
                         // New table tag but forgot to close the previous one
                         $text .= "</{$t}>";
                     } else {
                         if ($t == 'table') {
                             array_push($tablestack, $tagstack);
                             $tagstack = array();
                         }
                         array_push($tagstack, $t);
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' && !$slash ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
                 $badtag = false;
                 if (isset($htmlelements[$t = strtolower($t)])) {
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                     if (!$badtag) {
                         $rest = str_replace('>', '&gt;', $rest);
                         $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
                         continue;
                     }
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
     }
     return $text;
 }
Пример #16
0
 /**
  * Convert wikitext to HTML
  * Do not call this function recursively.
  *
  * @param $text String: text we want to parse
  * @param $title Title object
  * @param $options ParserOptions
  * @param $linestart boolean
  * @param $clearState boolean
  * @param $revid Int: number to pass in {{REVISIONID}}
  * @return ParserOutput a ParserOutput
  */
 public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null)
 {
     /**
      * First pass--just handle <nowiki> sections, pass the rest off
      * to internalParse() which does all the real work.
      */
     global $wgUseTidy, $wgAlwaysUseTidy;
     $fname = __METHOD__ . '-' . wfGetCaller();
     wfProfileIn(__METHOD__);
     wfProfileIn($fname);
     $this->startParse($title, $options, self::OT_HTML, $clearState);
     # Remove the strip marker tag prefix from the input, if present.
     if ($clearState) {
         $text = str_replace($this->mUniqPrefix, '', $text);
     }
     $oldRevisionId = $this->mRevisionId;
     $oldRevisionObject = $this->mRevisionObject;
     $oldRevisionTimestamp = $this->mRevisionTimestamp;
     $oldRevisionUser = $this->mRevisionUser;
     if ($revid !== null) {
         $this->mRevisionId = $revid;
         $this->mRevisionObject = null;
         $this->mRevisionTimestamp = null;
         $this->mRevisionUser = null;
     }
     wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState));
     # No more strip!
     wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState));
     $text = $this->internalParse($text);
     wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState));
     $text = $this->mStripState->unstripGeneral($text);
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         # Run convert unconditionally in 1.18-compatible mode
         global $wgBug34832TransitionalRollback;
         if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     /**
      * A converted title will be provided in the output object if title and
      * content conversion are enabled, the article text does not contain
      * a conversion-suppressing double-underscore tag, and no
      * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
      * automatic link conversion.
      */
     if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) {
         $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
         if ($convruletitle) {
             $this->mOutput->setTitleText($convruletitle);
         } else {
             $titleText = $this->getConverterLanguage()->convertTitle($title);
             $this->mOutput->setTitleText($titleText);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     wfRunHooks('ParserBeforeTidy', array(&$this, &$text));
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) {
         $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit());
     }
     wfRunHooks('ParserAfterTidy', array(&$this, &$text));
     # Information on include size limits, for the benefit of users who try to skirt them
     if ($this->mOptions->getEnableLimitReport()) {
         $max = $this->mOptions->getMaxIncludeSize();
         $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
         $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport;
         wfRunHooks('ParserLimitReport', array($this, &$limitReport));
         $text .= "\n<!-- \n{$limitReport}-->\n";
         if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) {
             wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey());
         }
     }
     $this->mOutput->setText($text);
     $this->mRevisionId = $oldRevisionId;
     $this->mRevisionObject = $oldRevisionObject;
     $this->mRevisionTimestamp = $oldRevisionTimestamp;
     $this->mRevisionUser = $oldRevisionUser;
     wfProfileOut($fname);
     wfProfileOut(__METHOD__);
     return $this->mOutput;
 }
Пример #17
0
 /**
  * Do any required setup which is dependent on test options.
  *
  * @see staticSetup() for more information about setup/teardown
  *
  * @param array $test Test info supplied by TestFileReader
  * @param callable|null $nextTeardown
  * @return ScopedCallback
  */
 public function perTestSetup($test, $nextTeardown = null)
 {
     $teardown = [];
     $this->checkSetupDone('setupDatabase', 'setDatabase');
     $teardown[] = $this->markSetupDone('perTestSetup');
     $opts = $this->parseOptions($test['options']);
     $config = $test['config'];
     // Find out values for some special options.
     $langCode = self::getOptionValue('language', $opts, 'en');
     $variant = self::getOptionValue('variant', $opts, false);
     $maxtoclevel = self::getOptionValue('wgMaxTocLevel', $opts, 999);
     $linkHolderBatchSize = self::getOptionValue('wgLinkHolderBatchSize', $opts, 1000);
     $setup = ['wgEnableUploads' => self::getOptionValue('wgEnableUploads', $opts, true), 'wgLanguageCode' => $langCode, 'wgRawHtml' => self::getOptionValue('wgRawHtml', $opts, false), 'wgNamespacesWithSubpages' => [0 => isset($opts['subpage'])], 'wgMaxTocLevel' => $maxtoclevel, 'wgAllowExternalImages' => self::getOptionValue('wgAllowExternalImages', $opts, true), 'wgThumbLimits' => [self::getOptionValue('thumbsize', $opts, 180)], 'wgDefaultLanguageVariant' => $variant, 'wgLinkHolderBatchSize' => $linkHolderBatchSize, 'wgEnableMagicLinks' => self::getOptionValue('wgEnableMagicLinks', $opts, []) + ['ISBN' => true, 'PMID' => true, 'RFC' => true]];
     if ($config) {
         $configLines = explode("\n", $config);
         foreach ($configLines as $line) {
             list($var, $value) = explode('=', $line, 2);
             $setup[$var] = eval("return {$value};");
         }
     }
     /** @since 1.20 */
     Hooks::run('ParserTestGlobals', [&$setup]);
     // Create tidy driver
     if (isset($opts['tidy'])) {
         // Cache a driver instance
         if ($this->tidyDriver === null) {
             $this->tidyDriver = MWTidy::factory($this->tidySupport->getConfig());
         }
         $tidy = $this->tidyDriver;
     } else {
         $tidy = false;
     }
     MWTidy::setInstance($tidy);
     $teardown[] = function () {
         MWTidy::destroySingleton();
     };
     // Set content language. This invalidates the magic word cache and title services
     $lang = Language::factory($langCode);
     $setup['wgContLang'] = $lang;
     $reset = function () {
         MagicWord::clearCache();
         $this->resetTitleServices();
     };
     $setup[] = $reset;
     $teardown[] = $reset;
     // Make a user object with the same language
     $user = new User();
     $user->setOption('language', $langCode);
     $setup['wgLang'] = $lang;
     // We (re)set $wgThumbLimits to a single-element array above.
     $user->setOption('thumbsize', 0);
     $setup['wgUser'] = $user;
     // And put both user and language into the context
     $context = RequestContext::getMain();
     $context->setUser($user);
     $context->setLanguage($lang);
     $teardown[] = function () use($context) {
         // Reset context to the restored globals
         $context->setUser($GLOBALS['wgUser']);
         $context->setLanguage($GLOBALS['wgContLang']);
     };
     $teardown[] = $this->executeSetupSnippets($setup);
     return $this->createTeardownObject($teardown, $nextTeardown);
 }
Пример #18
0
 /**
  * Helper function for parse() that transforms half-parsed HTML into fully
  * parsed HTML.
  *
  * @param string $text
  * @param bool $isMain
  * @param bool $linestart
  * @return string
  */
 private function internalParseHalfParsed($text, $isMain = true, $linestart = true)
 {
     $text = $this->mStripState->unstripGeneral($text);
     if ($isMain) {
         Hooks::run('ParserAfterUnstrip', array(&$this, &$text));
     }
     # Clean up special characters, only run once, next-to-last before doBlockLevels
     $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;', '/(\\302\\253) /' => '\\1&#160;', '/&#160;(!\\s*important)/' => ' \\1');
     $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
     $text = $this->doBlockLevels($text, $linestart);
     $this->replaceLinkHolders($text);
     /**
      * The input doesn't get language converted if
      * a) It's disabled
      * b) Content isn't converted
      * c) It's a conversion table
      * d) it is an interface message (which is in the user language)
      */
     if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) {
         if (!$this->mOptions->getInterfaceMessage()) {
             # The position of the convert() call should not be changed. it
             # assumes that the links are all replaced and the only thing left
             # is the <nowiki> mark.
             $text = $this->getConverterLanguage()->convert($text);
         }
     }
     $text = $this->mStripState->unstripNoWiki($text);
     if ($isMain) {
         Hooks::run('ParserBeforeTidy', array(&$this, &$text));
     }
     $text = $this->replaceTransparentTags($text);
     $text = $this->mStripState->unstripGeneral($text);
     $text = Sanitizer::normalizeCharReferences($text);
     if (MWTidy::isEnabled() && $this->mOptions->getTidy()) {
         $text = MWTidy::tidy($text);
     } else {
         # attempt to sanitize at least some nesting problems
         # (bug #2702 and quite a few others)
         $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9', '/<([bi])><\\/\\1>/' => '');
         $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text);
     }
     if ($isMain) {
         Hooks::run('ParserAfterTidy', array(&$this, &$text));
     }
     return $text;
 }
Пример #19
0
	/**
	 * Split summary of a wikilog article from the contents.
	 * If summary is part of the parser output, use it; otherwise, try to
	 * extract it from the content text (section zero, before the first
	 * heading).
	 *
	 * @param $parserOutput ParserOutput object.
	 * @return Two-element array with summary and content. Summary may be
	 *   NULL if nonexistent.
	 */
	public static function splitSummaryContent( $parserOutput ) {
		global $wgUseTidy;

		$content = Sanitizer::removeHTMLcomments( $parserOutput->getText() );

		if ( isset( $parserOutput->mExtWikilog ) && $parserOutput->mExtWikilog->mSummary ) {
			# Parser output contains wikilog output and summary, use it.
			$summary = Sanitizer::removeHTMLcomments( $parserOutput->mExtWikilog->mSummary );
		} else {
			# Try to extract summary from the content text.
			$blocks = preg_split( '/<(h[1-6]).*?>.*?<\\/\\1>/i', $content, 2 );
			if ( count( $blocks ) > 1 ) {
				# Long article with multiple sections, use only the first one.
				$summary = $blocks[0];
				# It is possible for the regex to split on a heading that is
				# not a child of the root element (e.g. <div><h2>...</h2>
				# </div> leaving an open <div> tag). In order to handle such
				# cases, we pass the summary through tidy if it is available.
				if ( $wgUseTidy ) {
					$summary = MWTidy::tidy( $summary );
				}
			} else {
				# Short article with a single section, use no summary and
				# leave to the caller to decide what to do.
				$summary = null;
			}
		}

		return array( $summary, $content );
	}
 /**
  * Show the special page
  */
 function execute($subpage)
 {
     global $wgParser, $wgUseTidy, $wgAlwaysUseTidy;
     $this->setHeaders();
     $request = $this->getRequest();
     $titleStr = $request->getText('wpContextTitle');
     $title = Title::newFromText($titleStr);
     if (!$title) {
         $title = $this->getPageTitle();
     }
     $input = $request->getText('wpInput');
     $this->generateXML = $request->getBool('wpGenerateXml');
     $this->generateRawHtml = $request->getBool('wpGenerateRawHtml');
     if (strlen($input)) {
         $this->removeComments = $request->getBool('wpRemoveComments', false);
         $this->removeNowiki = $request->getBool('wpRemoveNowiki', false);
         $options = ParserOptions::newFromContext($this->getContext());
         $options->setRemoveComments($this->removeComments);
         $options->setTidy(true);
         $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE);
         if ($this->generateXML) {
             $wgParser->startExternalParse($title, $options, OT_PREPROCESS);
             $dom = $wgParser->preprocessToDom($input);
             if (method_exists($dom, 'saveXML')) {
                 $xml = $dom->saveXML();
             } else {
                 $xml = $dom->__toString();
             }
         }
         $output = $wgParser->preprocess($input, $title, $options);
     } else {
         $this->removeComments = $request->getBool('wpRemoveComments', true);
         $this->removeNowiki = $request->getBool('wpRemoveNowiki', false);
         $output = false;
     }
     $out = $this->getOutput();
     $out->addWikiMsg('expand_templates_intro');
     $out->addHTML($this->makeForm($titleStr, $input));
     if ($output !== false) {
         if ($this->generateXML && strlen($output) > 0) {
             $out->addHTML($this->makeOutput($xml, 'expand_templates_xml_output'));
         }
         $tmp = $this->makeOutput($output);
         if ($this->removeNowiki) {
             $tmp = preg_replace(array('_&lt;nowiki&gt;_', '_&lt;/nowiki&gt;_', '_&lt;nowiki */&gt;_'), '', $tmp);
         }
         if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) {
             $tmp = MWTidy::tidy($tmp);
         }
         $out->addHTML($tmp);
         $rawhtml = $this->generateHtml($title, $output);
         if ($this->generateRawHtml && strlen($rawhtml) > 0) {
             $out->addHTML($this->makeOutput($rawhtml, 'expand_templates_html_output'));
         }
         $this->showHtmlPreview($title, $rawhtml, $out);
     }
 }
Пример #21
0
 /**
  * Run the "tidy" command on text if the $wgUseTidy
  * global is true
  *
  * @param $text String: the text to tidy
  * @return String
  */
 protected function tidy($text)
 {
     global $wgUseTidy;
     if ($wgUseTidy) {
         $text = MWTidy::tidy($text);
     }
     return $text;
 }
Пример #22
0
 protected function tearDown()
 {
     MWTidy::destroySingleton();
     parent::tearDown();
 }
 /**
  * @group medium
  * @dataProvider parserTestProvider
  * @param string $desc
  * @param string $input
  * @param string $result
  * @param array $opts
  * @param array $config
  */
 public function testParserTest($desc, $input, $result, $opts, $config)
 {
     if ($this->regex != '' && !preg_match('/' . $this->regex . '/', $desc)) {
         $this->assertTrue(true);
         // XXX: don't flood output with "test made no assertions"
         //$this->markTestSkipped( 'Filtered out by the user' );
         return;
     }
     if (!$this->isWikitextNS(NS_MAIN)) {
         // parser tests frequently assume that the main namespace contains wikitext.
         // @todo When setting up pages, force the content model. Only skip if
         //        $wgtContentModelUseDB is false.
         $this->markTestSkipped("Main namespace does not support wikitext," . "skipping parser test: {$desc}");
     }
     wfDebug("Running parser test: {$desc}\n");
     $opts = $this->parseOptions($opts);
     $context = $this->setupGlobals($opts, $config);
     $user = $context->getUser();
     $options = ParserOptions::newFromContext($context);
     if (isset($opts['title'])) {
         $titleText = $opts['title'];
     } else {
         $titleText = 'Parser test';
     }
     $local = isset($opts['local']);
     $preprocessor = isset($opts['preprocessor']) ? $opts['preprocessor'] : null;
     $parser = $this->getParser($preprocessor);
     $title = Title::newFromText($titleText);
     # Parser test requiring math. Make sure texvc is executable
     # or just skip such tests.
     if (isset($opts['math']) || isset($opts['texvc'])) {
         global $wgTexvc;
         if (!isset($wgTexvc)) {
             $this->markTestSkipped("SKIPPED: \$wgTexvc is not set");
         } elseif (!is_executable($wgTexvc)) {
             $this->markTestSkipped("SKIPPED: texvc binary does not exist" . " or is not executable.\n" . "Current configuration is:\n\$wgTexvc = '{$wgTexvc}'");
         }
     }
     if (isset($opts['djvu'])) {
         if (!$this->djVuSupport->isEnabled()) {
             $this->markTestSkipped("SKIPPED: djvu binaries do not exist or are not executable.\n");
         }
     }
     if (isset($opts['pst'])) {
         $out = $parser->preSaveTransform($input, $title, $user, $options);
     } elseif (isset($opts['msg'])) {
         $out = $parser->transformMsg($input, $options, $title);
     } elseif (isset($opts['section'])) {
         $section = $opts['section'];
         $out = $parser->getSection($input, $section);
     } elseif (isset($opts['replace'])) {
         $section = $opts['replace'][0];
         $replace = $opts['replace'][1];
         $out = $parser->replaceSection($input, $section, $replace);
     } elseif (isset($opts['comment'])) {
         $out = Linker::formatComment($input, $title, $local);
     } elseif (isset($opts['preload'])) {
         $out = $parser->getPreloadText($input, $title, $options);
     } else {
         $output = $parser->parse($input, $title, $options, true, true, 1337);
         $output->setTOCEnabled(!isset($opts['notoc']));
         $out = $output->getText();
         if (isset($opts['tidy'])) {
             if (!$this->tidySupport->isEnabled()) {
                 $this->markTestSkipped("SKIPPED: tidy extension is not installed.\n");
             } else {
                 $out = MWTidy::tidy($out);
                 $out = preg_replace('/\\s+$/', '', $out);
             }
         }
         if (isset($opts['showtitle'])) {
             if ($output->getTitleText()) {
                 $title = $output->getTitleText();
             }
             $out = "{$title}\n{$out}";
         }
         if (isset($opts['ill'])) {
             $out = implode(' ', $output->getLanguageLinks());
         } elseif (isset($opts['cat'])) {
             $outputPage = $context->getOutput();
             $outputPage->addCategoryLinks($output->getCategories());
             $cats = $outputPage->getCategoryLinks();
             if (isset($cats['normal'])) {
                 $out = implode(' ', $cats['normal']);
             } else {
                 $out = '';
             }
         }
         $parser->mPreprocessor = null;
     }
     $this->teardownGlobals();
     $this->assertEquals($result, $out, $desc);
 }