/** * @dataProvider provideTestWrapping */ public function testTidyWrapping($expected, $text, $msg = '') { $text = MWTidy::tidy($text); // We don't care about where Tidy wants to stick is <p>s $text = trim(preg_replace('#</?p>#', '', $text)); // Windows, we love you! $text = str_replace("\r", '', $text); $this->assertEquals($expected, $text, $msg); }
function execute($subpage) { global $wgRequest, $wgParser, $wgOut; $this->setHeaders(); $this->isNewParser = is_callable(array($wgParser, 'preprocessToDom')); $titleStr = $wgRequest->getText('contexttitle'); $title = Title::newFromText($titleStr); $selfTitle = $this->getTitle(); if (!$title) { $title = $selfTitle; } $input = $wgRequest->getText('input'); $this->generateXML = $this->isNewParser ? $wgRequest->getBool('generate_xml') : false; if (strlen($input)) { $this->removeComments = $wgRequest->getBool('removecomments', false); $this->removeNowiki = $wgRequest->getBool('removenowiki', false); $options = new ParserOptions(); $options->setRemoveComments($this->removeComments); $options->setTidy(true); $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE); if ($this->generateXML) { $wgParser->startExternalParse($title, $options, OT_PREPROCESS); $dom = $wgParser->preprocessToDom($input); if (is_callable(array($dom, 'saveXML'))) { $xml = $dom->saveXML(); } else { $xml = $dom->__toString(); } } $output = $wgParser->preprocess($input, $title, $options); } else { $this->removeComments = $wgRequest->getBool('removecomments', true); $this->removeNowiki = $wgRequest->getBool('removenowiki', false); $output = false; } $wgOut->addWikiText(wfMsg('expand_templates_intro')); $wgOut->addHTML($this->makeForm($titleStr, $input)); if ($output !== false) { global $wgUseTidy, $wgAlwaysUseTidy; if ($this->generateXML) { $wgOut->addHTML($this->makeOutput($xml, 'expand_templates_xml_output')); } $tmp = $this->makeOutput($output); if ($this->removeNowiki) { $tmp = preg_replace(array('_<nowiki>_', '_</nowiki>_', '_<nowiki */>_'), '', $tmp); } if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) { $tmp = MWTidy::tidy($tmp); } $wgOut->addHTML($tmp); $this->showHtmlPreview($title, $output, $wgOut); } }
/** * Asserts that the given string is valid HTML document. * * @since 1.23 * * @note Will mark the test as skipped if the "tidy" module is not installed. * @note This ignores $wgUseTidy, so we can check for valid HTML even (and especially) * when automatic tidying is disabled. * * @param string $html A complete HTML document */ protected function assertValidHtmlDocument($html) { // Note: we only validate if the tidy PHP extension is available. // In case wgTidyInternal is false, MWTidy would fall back to the command line version // of tidy. In that case however, we can not reliably detect whether a failing validation // is due to malformed HTML, or caused by tidy not being installed as a command line tool. // That would cause all HTML assertions to fail on a system that has no tidy installed. if (!$GLOBALS['wgTidyInternal'] || !MWTidy::isEnabled()) { $this->markTestSkipped('Tidy extension not installed'); } $errorBuffer = ''; MWTidy::checkErrors($html, $errorBuffer); $allErrors = preg_split('/[\\r\\n]+/', $errorBuffer); // Filter Tidy warnings which aren't useful for us. // Tidy eg. often cries about parameters missing which have actually // been deprecated since HTML4, thus we should not care about them. $errors = preg_grep('/^(.*Warning: (trimming empty|.* lacks ".*?" attribute).*|\\s*)$/m', $allErrors, PREG_GREP_INVERT); $this->assertEmpty($errors, implode("\n", $errors)); }
/** * Replace the output with an error if the HTML is not valid * * @param $s string * * @return string */ function wfHtmlValidationHandler($s) { $errors = ''; if (MWTidy::checkErrors($s, $errors)) { return $s; } header('Cache-Control: no-cache'); $out = <<<EOT <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" dir="ltr"> <head> <title>HTML validation error</title> <style> .highlight { background-color: #ffc } li { white-space: pre } </style> </head> <body> <h1>HTML validation error</h1> <ul> EOT; $error = strtok($errors, "\n"); $badLines = array(); while ($error !== false) { if (preg_match('/^line (\\d+)/', $error, $m)) { $lineNum = intval($m[1]); $badLines[$lineNum] = true; $out .= "<li><a href=\"#line-{$lineNum}\">" . htmlspecialchars($error) . "</a></li>\n"; } $error = strtok("\n"); } $out .= '</ul>'; $out .= '<pre>' . htmlspecialchars($errors) . '</pre>'; $out .= "<ol>\n"; $line = strtok($s, "\n"); $i = 1; while ($line !== false) { if (isset($badLines[$i])) { $out .= "<li class=\"highlight\" id=\"line-{$i}\">"; } else { $out .= '<li>'; } $out .= htmlspecialchars($line) . "</li>\n"; $line = strtok("\n"); $i++; } $out .= '</ol></body></html>'; return $out; }
/** * Set up the global variables for a consistent environment for each test. * Ideally this should replace the global configuration entirely. * @param array $opts * @param string $config * @return RequestContext */ protected function setupGlobals($opts = array(), $config = '') { global $wgFileBackends; # Find out values for some special options. $lang = self::getOptionValue('language', $opts, 'en'); $variant = self::getOptionValue('variant', $opts, false); $maxtoclevel = self::getOptionValue('wgMaxTocLevel', $opts, 999); $linkHolderBatchSize = self::getOptionValue('wgLinkHolderBatchSize', $opts, 1000); $uploadDir = $this->getUploadDir(); if ($this->getCliArg('use-filebackend')) { if (self::$backendToUse) { $backend = self::$backendToUse; } else { $name = $this->getCliArg('use-filebackend'); $useConfig = array(); foreach ($wgFileBackends as $conf) { if ($conf['name'] == $name) { $useConfig = $conf; } } $useConfig['name'] = 'local-backend'; // swap name unset($useConfig['lockManager']); unset($useConfig['fileJournal']); $class = $useConfig['class']; self::$backendToUse = new $class($useConfig); $backend = self::$backendToUse; } } else { # Replace with a mock. We do not care about generating real # files on the filesystem, just need to expose the file # informations. $backend = new MockFileBackend(array('name' => 'local-backend', 'wikiId' => wfWikiId())); } $settings = array('wgLocalFileRepo' => array('class' => 'LocalRepo', 'name' => 'local', 'url' => 'http://example.com/images', 'hashLevels' => 2, 'transformVia404' => false, 'backend' => $backend), 'wgEnableUploads' => self::getOptionValue('wgEnableUploads', $opts, true), 'wgLanguageCode' => $lang, 'wgDBprefix' => $this->db->getType() != 'oracle' ? 'unittest_' : 'ut_', 'wgRawHtml' => self::getOptionValue('wgRawHtml', $opts, false), 'wgNamespacesWithSubpages' => array(NS_MAIN => isset($opts['subpage'])), 'wgAllowExternalImages' => self::getOptionValue('wgAllowExternalImages', $opts, true), 'wgThumbLimits' => array(self::getOptionValue('thumbsize', $opts, 180)), 'wgMaxTocLevel' => $maxtoclevel, 'wgUseTeX' => isset($opts['math']) || isset($opts['texvc']), 'wgMathDirectory' => $uploadDir . '/math', 'wgDefaultLanguageVariant' => $variant, 'wgLinkHolderBatchSize' => $linkHolderBatchSize, 'wgUseTidy' => isset($opts['tidy'])); if ($config) { $configLines = explode("\n", $config); foreach ($configLines as $line) { list($var, $value) = explode('=', $line, 2); $settings[$var] = eval("return {$value};"); // ??? } } $this->savedGlobals = array(); /** @since 1.20 */ Hooks::run('ParserTestGlobals', array(&$settings)); $langObj = Language::factory($lang); $settings['wgContLang'] = $langObj; $settings['wgLang'] = $langObj; $context = new RequestContext(); $settings['wgOut'] = $context->getOutput(); $settings['wgUser'] = $context->getUser(); $settings['wgRequest'] = $context->getRequest(); // We (re)set $wgThumbLimits to a single-element array above. $context->getUser()->setOption('thumbsize', 0); foreach ($settings as $var => $val) { if (array_key_exists($var, $GLOBALS)) { $this->savedGlobals[$var] = $GLOBALS[$var]; } $GLOBALS[$var] = $val; } MWTidy::destroySingleton(); MagicWord::clearCache(); # The entries saved into RepoGroup cache with previous globals will be wrong. RepoGroup::destroySingleton(); FileBackendGroup::destroySingleton(); # Create dummy files in storage $this->setupUploads(); # Publish the articles after we have the final language set $this->publishTestArticles(); MessageCache::destroyInstance(); return $context; }
/** * Render CompaTable HTML code * * Reads from JSON file, triggers generation if required * and optionally adds ESI tags. * * @param string $input * @param array $args * @param Parser $parser */ public static function renderCompaTables($input, array $args, Parser $parser) { global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy; $out = ''; $args['topic'] = isset($args['topic']) ? $args['topic'] : ''; $args['feature'] = isset($args['feature']) ? $args['feature'] : ''; $args['format'] = isset($args['format']) ? $args['format'] : ''; $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']); /** ***************************** **/ $data = self::getData(); if ($data !== null) { $cached = self::memcacheRead($args['cacheKey'], $data['hash']); if ($cached !== false) { $table = $cached['output']; } else { $generated = self::generateCompaTable($data, $args); if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) { $generated['output'] = MWTidy::tidy($generated['output']); } self::memcacheSave($args['cacheKey'], $generated); $table = $generated['output']; } /** ***************************** * */ // We are ignoring <compatibility>input would be here</compatibility> // because its useless for now. //if ( $input != '' ) { // $out .= '<p>' . $input . '</p>'; //} if ($wgCompatablesUseESI === true) { $urlArgs['topic'] = $args['topic']; $urlArgs['feature'] = $args['feature']; $urlArgs['format'] = $args['format']; $urlArgs['foresi'] = 1; // @TODO: this breaks in ESI level if $url ends up http for https views $urlHelper = SpecialPage::getTitleFor('Compatables')->getFullUrl($urlArgs); $out .= self::applyEsiTags($table, wfExpandUrl($urlHelper, PROTO_INTERNAL)); } else { $out .= $table; $parser->getOutput()->updateCacheExpiry(6 * 3600); // worse cache hit rate } } else { wfDebugLog('CompaTables', 'Could not generate table, data is either empty or had problems.'); $out = '<!-- Compatables: Could not generate table, data might be empty or had problems with caching -->'; } return $out; }
/** * Replace the output with an error if the HTML is not valid * * @param $s string * * @return string */ function wfHtmlValidationHandler($s) { $errors = ''; if (MWTidy::checkErrors($s, $errors)) { return $s; } header('Cache-Control: no-cache'); $out = Html::element('h1', null, 'HTML validation error'); $out .= Html::openElement('ul'); $error = strtok($errors, "\n"); $badLines = array(); while ($error !== false) { if (preg_match('/^line (\\d+)/', $error, $m)) { $lineNum = intval($m[1]); $badLines[$lineNum] = true; $out .= Html::rawElement('li', null, Html::element('a', array('href' => "#line-{$lineNum}"), $error)) . "\n"; } $error = strtok("\n"); } $out .= Html::closeElement('ul'); $out .= Html::element('pre', null, $errors); $out .= Html::openElement('ol') . "\n"; $line = strtok($s, "\n"); $i = 1; while ($line !== false) { $attrs = array(); if (isset($badLines[$i])) { $attrs['class'] = 'highlight'; $attrs['id'] = "line-{$i}"; } $out .= Html::element('li', $attrs, $line) . "\n"; $line = strtok("\n"); $i++; } $out .= Html::closeElement('ol'); $style = <<<CSS .highlight { background-color: #ffc } li { white-space: pre } CSS; $out = Html::htmlHeader(array('lang' => 'en', 'dir' => 'ltr')) . Html::rawElement('head', null, Html::element('title', null, 'HTML validation error') . Html::inlineStyle($style)) . Html::rawElement('body', null, $out) . Html::closeElement('html'); return $out; }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); $this->mInputSize = strlen($text); if ($this->mOptions->getEnableLimitReport()) { $this->mOutput->resetParseStartTime(); } # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; $oldRevisionSize = $this->mRevisionSize; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; $this->mRevisionSize = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $cpuTime = $this->mOutput->getTimeSinceStart('cpu'); if ($cpuTime !== null) { $this->mOutput->setLimitReportData('limitreport-cputime', sprintf("%.3f", $cpuTime)); } $wallTime = $this->mOutput->getTimeSinceStart('wall'); $this->mOutput->setLimitReportData('limitreport-walltime', sprintf("%.3f", $wallTime)); $this->mOutput->setLimitReportData('limitreport-ppvisitednodes', array($this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-ppgeneratednodes', array($this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount())); $this->mOutput->setLimitReportData('limitreport-postexpandincludesize', array($this->mIncludeSizes['post-expand'], $max)); $this->mOutput->setLimitReportData('limitreport-templateargumentsize', array($this->mIncludeSizes['arg'], $max)); $this->mOutput->setLimitReportData('limitreport-expansiondepth', array($this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth())); $this->mOutput->setLimitReportData('limitreport-expensivefunctioncount', array($this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit())); wfRunHooks('ParserLimitReportPrepare', array($this, $this->mOutput)); $limitReport = "NewPP limit report\n"; if ($wgShowHostnames) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } foreach ($this->mOutput->getLimitReportData() as $key => $value) { if (wfRunHooks('ParserLimitReportFormat', array($key, &$value, &$limitReport, false, false))) { $keyMsg = wfMessage($key)->inLanguage('en')->useDatabase(false); $valueMsg = wfMessage(array("{$key}-value-text", "{$key}-value"))->inLanguage('en')->useDatabase(false); if (!$valueMsg->exists()) { $valueMsg = new RawMessage('$1'); } if (!$keyMsg->isDisabled() && !$valueMsg->isDisabled()) { $valueMsg->params($value); $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; } } } // Since we're not really outputting HTML, decode the entities and // then re-encode the things that need hiding inside HTML comments. $limitReport = htmlspecialchars_decode($limitReport); wfRunHooks('ParserLimitReport', array($this, &$limitReport)); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace(array('-', '&'), array('‐', '&'), $limitReport); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; $this->mRevisionSize = $oldRevisionSize; $this->mInputSize = false; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Interface with html tidy * @deprecated Use MWTidy::tidy() */ public static function tidy($text) { wfDeprecated(__METHOD__); return MWTidy::tidy($text); }
public function execute($par) { global $wgCompatablesUseESI, $wgUseTidy, $wgAlwaysUseTidy, $wgCompatablesJsonFileUrl; $this->setHeaders(); $args['topic'] = $this->getRequest()->getVal('topic'); $args['feature'] = $this->getRequest()->getVal('feature'); $args['format'] = $this->getRequest()->getVal('format'); $args['cacheKey'] = wfMemcKey('compatables', $args['format'], $args['topic'], $args['feature']); // Handle purge requests from admins... // @TODO: Varnish, which only supports a few bits of ESI, can not handle this // (https://www.varnish-cache.org/docs/3.0/tutorial/esi.html) // (https://www.varnish-cache.org/trac/wiki/Future_ESI) if ($this->getRequest()->getVal('action') === 'purge') { // See https://github.com/webplatform/mediawiki/issues/16 #TODO Compatables::memcacheRemove(wfMemcKey('webplatformdocs', 'compatables', 'data', 'full')); Compatables::memcacheRemove($args['cacheKey']); try { $req = MWHttpRequest::factory($wgCompatablesJsonFileUrl, array('method' => 'PURGE')); $status = $req->execute(); wfDebugLog('CompaTables', 'cURL PURGE done to "' . $wgCompatablesJsonFileUrl); } catch (Exception $e) { wfDebugLog('CompaTables', 'Had problems with cURL PURGE to "' . $wgCompatablesJsonFileUrl . '", message ' . $e->getMessage()); // Do nothing } if ($wgCompatablesUseESI && $this->getUser()->isAllowed('purgecompatables')) { // Get the ESI URL prefix to purge $urlPrefix = SpecialPage::getTitleFor('Compatables')->getFullUrl(); $urlPrefix = wfExpandUrl($urlPrefix, PROTO_INTERNAL); // Include as an in-band ESI invalidation request $this->getOutput()->addHtml("\n<esi:invalidate>\n" . "<?xml version=\"1.0\"?>\n" . "<!DOCTYPE INVALIDATION SYSTEM \"internal:///WCSinvalidation.dtd\">\n" . "<INVALIDATION VERSION=\"WCS-1.1\">\n" . "<OBJECT>\n" . Xml::element('ADVANCEDSELECTOR', array('URIPREFIX' => $urlPrefix)) . "\n<ACTION REMOVALTTL=\"0\"/>\n" . "</OBJECT>\n" . "</INVALIDATION>\n" . "</esi:invalidate>\n"); $this->getOutput()->addWikiMsg('compatables-purged'); return; } else { throw new PermissionsError('purgecompatables'); } } // 1 hour server-side cache max before revalidate $this->getOutput()->setSquidMaxage(Compatables::MAX_AGE); // Try to handle IMS GET requests from CDN efficiently // $data['timestamp'] has been added, to not have to do // 2 HTTP queries $data = Compatables::getData(); if ($this->getOutput()->checkLastModified($data['timestamp'])) { return; // nothing to send (cache hit) } /** ***************************** **/ $cached = Compatables::memcacheRead($args['cacheKey'], $data['hash']); if ($cached !== false) { $table = $cached['output']; } else { $generated = Compatables::generateCompaTable($data, $args); if ($wgUseTidy && $parser->getOptions()->getTidy() || $wgAlwaysUseTidy) { $generated['output'] = MWTidy::tidy($generated['output']); } Compatables::memcacheSave($args['cacheKey'], $generated); $table = $generated['output']; } /** ***************************** **/ $this->getOutput()->addHtml('<h1>Compatibility data</h1>'); //$this->getOutput()->addHtml('<pre><nowiki>'.print_r($data,1).'</nowiki></pre>'); if ($this->getRequest()->getBool('foresi')) { $this->getOutput()->addHtml($table); $this->getOutput()->setArticleBodyOnly(true); } else { $this->getOutput()->addHtml($table); } }
/** * Get data of requested article. * @param Title $title * @param boolean $noImages * @return array */ private function getData(Title $title, $noImages) { global $wgMemc, $wgUseTidy, $wgMFTidyMobileViewSections, $wgMFMinCachedPageSize, $wgMFSpecialCaseMainPage; $wp = $this->makeWikiPage($title); if ($this->followRedirects && $wp->isRedirect()) { $newTitle = $wp->getRedirectTarget(); if ($newTitle) { $title = $newTitle; $this->getResult()->addValue(null, $this->getModuleName(), array('redirected' => $title->getPrefixedText())); if ($title->getNamespace() < 0) { $this->getResult()->addValue(null, $this->getModuleName(), array('viewable' => 'no')); return array(); } $wp = $this->makeWikiPage($title); } } $latest = $wp->getLatest(); if ($this->file) { $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $this->file->getSha1(), $this->variant); $cacheExpiry = 3600; } else { if (!$latest) { // https://bugzilla.wikimedia.org/show_bug.cgi?id=53378 // Title::exists() above doesn't seem to always catch recently deleted pages $this->dieUsageMsg(array('notanarticle', $title->getPrefixedText())); } $parserOptions = $this->makeParserOptions($wp); $parserCacheKey = ParserCache::singleton()->getKey($wp, $parserOptions); $key = wfMemcKey('mf', 'mobileview', self::CACHE_VERSION, $noImages, $latest, $this->noTransform, $parserCacheKey); } $data = $wgMemc->get($key); if ($data) { wfIncrStats('mobile.view.cache-hit'); return $data; } wfIncrStats('mobile.view.cache-miss'); if ($this->file) { $html = $this->getFilePage($title); } else { $parserOutput = $this->getParserOutput($wp, $parserOptions); $html = $parserOutput->getText(); $cacheExpiry = $parserOutput->getCacheExpiry(); } if (!$this->noTransform) { $mf = new MobileFormatter(MobileFormatter::wrapHTML($html), $title); $mf->setRemoveMedia($noImages); $mf->filterContent(); $mf->setIsMainPage($this->mainPage && $wgMFSpecialCaseMainPage); $html = $mf->getText(); } if ($this->mainPage || $this->file) { $data = array('sections' => array(), 'text' => array($html), 'refsections' => array()); } else { $data = array(); $data['sections'] = $parserOutput->getSections(); $sectionCount = count($data['sections']); for ($i = 0; $i < $sectionCount; $i++) { $data['sections'][$i]['line'] = $title->getPageLanguage()->convert($data['sections'][$i]['line']); } $chunks = preg_split('/<h(?=[1-6]\\b)/i', $html); if (count($chunks) != count($data['sections']) + 1) { wfDebugLog('mobile', __METHOD__ . "(): mismatching number of " . "sections from parser and split on page {$title->getPrefixedText()}, oldid={$latest}"); // We can't be sure about anything here, return all page HTML as one big section $chunks = array($html); $data['sections'] = array(); } $data['text'] = array(); $data['refsections'] = array(); foreach ($chunks as $chunk) { if (count($data['text'])) { $chunk = "<h{$chunk}"; } if ($wgUseTidy && $wgMFTidyMobileViewSections && count($chunks) > 1) { $chunk = MWTidy::tidy($chunk); } if (preg_match('/<ol\\b[^>]*?class="references"/', $chunk)) { $data['refsections'][count($data['text'])] = true; } $data['text'][] = $chunk; } if ($this->usePageImages) { $image = $this->getPageImage($title); if ($image) { $data['image'] = $image->getTitle()->getText(); } } } $data['lastmodified'] = wfTimestamp(TS_ISO_8601, $wp->getTimestamp()); // Page id $data['id'] = $wp->getId(); $user = User::newFromId($wp->getUser()); if (!$user->isAnon()) { $data['lastmodifiedby'] = array('name' => $wp->getUserText(), 'gender' => $user->getOption('gender')); } else { $data['lastmodifiedby'] = null; } $data['revision'] = $title->getLatestRevID(); if (isset($parserOutput)) { $languages = $parserOutput->getLanguageLinks(); $data['languagecount'] = count($languages); $data['displaytitle'] = $parserOutput->getDisplayTitle(); // @fixme: Does no work for some extension properties that get added in LinksUpdate $data['pageprops'] = $parserOutput->getProperties(); } else { $data['languagecount'] = 0; $data['displaytitle'] = $title->getPrefixedText(); $data['pageprops'] = array(); } if ($title->getPageLanguage()->hasVariants()) { $data['hasvariants'] = true; } // Don't store small pages to decrease cache size requirements if (strlen($html) >= $wgMFMinCachedPageSize) { // store for the same time as original parser output $wgMemc->set($key, $data, $cacheExpiry); } return $data; }
/** * Destroy the current singleton instance */ public static function destroySingleton() { self::$instance = null; }
/** * @dataProvider dataRemoveHTMLtags * @covers Sanitizer::removeHTMLtags */ public function testRemoveHTMLtags($input, $output, $msg = null) { MWTidy::setInstance(false); $this->assertEquals($output, Sanitizer::removeHTMLtags($input), $msg); }
function truncateHtml($text, $length, $ellipsis = '...') { # Use the localized ellipsis character if ($ellipsis == '...') { $ellipsis = wfMsgExt('ellipsis', array('escapenoentities', 'language' => $this)); } # Check if there is no need to truncate if ($length <= 0) { return $ellipsis; // no text shown, nothing to format } elseif (strlen($text) <= $length) { return $text; // string short enough even *with* HTML } $text = MWTidy::tidy($text); // fix tags $displayLen = 0; // innerHTML legth so far $testingEllipsis = false; // checking if ellipses will make string longer/equal? $tagType = 0; // 0-open, 1-close $bracketState = 0; // 1-tag start, 2-tag name, 0-neither $entityState = 0; // 0-not entity, 1-entity $tag = $ret = ''; $openTags = array(); // open tag stack $textLen = strlen($text); for ($pos = 0; $pos < $textLen; ++$pos) { $ch = $text[$pos]; $lastCh = $pos ? $text[$pos - 1] : ''; $ret .= $ch; // add to result string if ($ch == '<') { $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags); // for bad HTML $entityState = 0; // for bad HTML $bracketState = 1; // tag started (checking for backslash) } elseif ($ch == '>') { $this->truncate_endBracket($tag, $tagType, $lastCh, $openTags); $entityState = 0; // for bad HTML $bracketState = 0; // out of brackets } elseif ($bracketState == 1) { if ($ch == '/') { $tagType = 1; // close tag (e.g. "</span>") } else { $tagType = 0; // open tag (e.g. "<span>") $tag .= $ch; } $bracketState = 2; // building tag name } elseif ($bracketState == 2) { if ($ch != ' ') { $tag .= $ch; } else { // Name found (e.g. "<a href=..."), add on tag attributes... $pos += $this->truncate_skip($ret, $text, "<>", $pos + 1); } } elseif ($bracketState == 0) { if ($entityState) { if ($ch == ';') { $entityState = 0; $displayLen++; // entity is one displayed char } } else { if ($ch == '&') { $entityState = 1; // entity found, (e.g. " ") } else { $displayLen++; // this char is displayed // Add on the other display text after this... $skipped = $this->truncate_skip($ret, $text, "<>&", $pos + 1, $length - $displayLen); $displayLen += $skipped; $pos += $skipped; } } } # Consider truncation once the display length has reached the maximim. # Double-check that we're not in the middle of a bracket/entity... if ($displayLen >= $length && $bracketState == 0 && $entityState == 0) { if (!$testingEllipsis) { $testingEllipsis = true; # Save where we are; we will truncate here unless # the ellipsis actually makes the string longer. $pOpenTags = $openTags; // save state $pRet = $ret; // save state } elseif ($displayLen > $length + strlen($ellipsis)) { # Ellipsis won't make string longer/equal, the truncation point was OK. $openTags = $pOpenTags; // reload state $ret = $this->removeBadCharLast($pRet); // reload state, multi-byte char fix $ret .= $ellipsis; // add ellipsis break; } } } if ($displayLen == 0) { return ''; // no text shown, nothing to format } // Close the last tag if left unclosed by bad HTML $this->truncate_endBracket($tag, $text[$textLen - 1], $tagType, $openTags); while (count($openTags) > 0) { $ret .= '</' . array_pop($openTags) . '>'; // close open tags } return $ret; }
/** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments * @param string $text * @param callable $processCallback Callback to do any variable or parameter * replacements in HTML attribute values * @param array|bool $args Arguments for the processing callback * @param array $extratags For any extra tags to include * @param array $removetags For any tags (default or extra) to exclude * @return string */ public static function removeHTMLtags($text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array()) { extract(self::getRecognizedTagData($extratags, $removetags)); # Remove HTML comments $text = Sanitizer::removeHTMLcomments($text); $bits = explode('<', $text); $text = str_replace('>', '>', array_shift($bits)); if (!MWTidy::isEnabled()) { $tagstack = $tablestack = array(); foreach ($bits as $x) { $regs = array(); # $slash: Does the current element start with a '/'? # $t: Current element name # $params: String between element name and > # $brace: Ending '>' or '/>' # $rest: Everything until the next element of $bits if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) { list(, $slash, $t, $params, $brace, $rest) = $regs; } else { $slash = $t = $params = $brace = $rest = null; } $badtag = false; if (isset($htmlelements[$t = strtolower($t)])) { # Check our stack if ($slash && isset($htmlsingleonly[$t])) { $badtag = true; } elseif ($slash) { # Closing a tag... is it the one we just opened? MediaWiki\suppressWarnings(); $ot = array_pop($tagstack); MediaWiki\restoreWarnings(); if ($ot != $t) { if (isset($htmlsingleallowed[$ot])) { # Pop all elements with an optional close tag # and see if we find a match below them $optstack = array(); array_push($optstack, $ot); MediaWiki\suppressWarnings(); $ot = array_pop($tagstack); MediaWiki\restoreWarnings(); while ($ot != $t && isset($htmlsingleallowed[$ot])) { array_push($optstack, $ot); MediaWiki\suppressWarnings(); $ot = array_pop($tagstack); MediaWiki\restoreWarnings(); } if ($t != $ot) { # No match. Push the optional elements back again $badtag = true; MediaWiki\suppressWarnings(); $ot = array_pop($optstack); MediaWiki\restoreWarnings(); while ($ot) { array_push($tagstack, $ot); MediaWiki\suppressWarnings(); $ot = array_pop($optstack); MediaWiki\restoreWarnings(); } } } else { MediaWiki\suppressWarnings(); array_push($tagstack, $ot); MediaWiki\restoreWarnings(); # <li> can be nested in <ul> or <ol>, skip those cases: if (!isset($htmllist[$ot]) || !isset($listtags[$t])) { $badtag = true; } } } else { if ($t == 'table') { $tagstack = array_pop($tablestack); } } $newparams = ''; } else { # Keep track for later if (isset($tabletags[$t]) && !in_array('table', $tagstack)) { $badtag = true; } elseif (in_array($t, $tagstack) && !isset($htmlnest[$t])) { $badtag = true; # Is it a self closed htmlpair ? (bug 5487) } elseif ($brace == '/>' && isset($htmlpairs[$t])) { $badtag = true; } elseif (isset($htmlsingleonly[$t])) { # Hack to force empty tag for unclosable elements $brace = '/>'; } elseif (isset($htmlsingle[$t])) { # Hack to not close $htmlsingle tags $brace = null; # Still need to push this optionally-closed tag to # the tag stack so that we can match end tags # instead of marking them as bad. array_push($tagstack, $t); } elseif (isset($tabletags[$t]) && in_array($t, $tagstack)) { // New table tag but forgot to close the previous one $text .= "</{$t}>"; } else { if ($t == 'table') { array_push($tablestack, $tagstack); $tagstack = array(); } array_push($tagstack, $t); } # Replace any variables or template parameters with # plaintext results. if (is_callable($processCallback)) { call_user_func_array($processCallback, array(&$params, $args)); } if (!Sanitizer::validateTag($params, $t)) { $badtag = true; } # Strip non-approved attributes from the tag $newparams = Sanitizer::fixTagAttributes($params, $t); } if (!$badtag) { $rest = str_replace('>', '>', $rest); $close = $brace == '/>' && !$slash ? ' /' : ''; $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}"; continue; } } $text .= '<' . str_replace('>', '>', $x); } # Close off any remaining tags while (is_array($tagstack) && ($t = array_pop($tagstack))) { $text .= "</{$t}>\n"; if ($t == 'table') { $tagstack = array_pop($tablestack); } } } else { # this might be possible using tidy itself foreach ($bits as $x) { if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) { list(, $slash, $t, $params, $brace, $rest) = $regs; $badtag = false; if (isset($htmlelements[$t = strtolower($t)])) { if (is_callable($processCallback)) { call_user_func_array($processCallback, array(&$params, $args)); } if (!Sanitizer::validateTag($params, $t)) { $badtag = true; } $newparams = Sanitizer::fixTagAttributes($params, $t); if (!$badtag) { $rest = str_replace('>', '>', $rest); $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}"; continue; } } } $text .= '<' . str_replace('>', '>', $x); } } return $text; }
/** * Convert wikitext to HTML * Do not call this function recursively. * * @param $text String: text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn(__METHOD__); wfProfileIn($fname); $this->startParse($title, $options, self::OT_HTML, $clearState); # Remove the strip marker tag prefix from the input, if present. if ($clearState) { $text = str_replace($this->mUniqPrefix, '', $text); } $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; if ($revid !== null) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; } wfRunHooks('ParserBeforeStrip', array(&$this, &$text, &$this->mStripState)); # No more strip! wfRunHooks('ParserAfterStrip', array(&$this, &$text, &$this->mStripState)); $text = $this->internalParse($text); wfRunHooks('ParserAfterParse', array(&$this, &$text, &$this->mStripState)); $text = $this->mStripState->unstripGeneral($text); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($options->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { # Run convert unconditionally in 1.18-compatible mode global $wgBug34832TransitionalRollback; if ($wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } /** * A converted title will be provided in the output object if title and * content conversion are enabled, the article text does not contain * a conversion-suppressing double-underscore tag, and no * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over * automatic link conversion. */ if (!($options->getDisableTitleConversion() || isset($this->mDoubleUnderscores['nocontentconvert']) || isset($this->mDoubleUnderscores['notitleconvert']) || $this->mOutput->getDisplayTitle() !== false)) { $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); if ($convruletitle) { $this->mOutput->setTitleText($convruletitle); } else { $titleText = $this->getConverterLanguage()->convertTitle($title); $this->mOutput->setTitleText($titleText); } } $text = $this->mStripState->unstripNoWiki($text); wfRunHooks('ParserBeforeTidy', array(&$this, &$text)); $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if ($wgUseTidy && $this->mOptions->getTidy() || $wgAlwaysUseTidy) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit()) { $this->limitationWarn('expensive-parserfunction', $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit()); } wfRunHooks('ParserAfterTidy', array(&$this, &$text)); # Information on include size limits, for the benefit of users who try to skirt them if ($this->mOptions->getEnableLimitReport()) { $max = $this->mOptions->getMaxIncludeSize(); $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; $limitReport = "NewPP limit report\n" . "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/{$max} bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/{$max} bytes\n" . "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks('ParserLimitReport', array($this, &$limitReport)); $text .= "\n<!-- \n{$limitReport}-->\n"; if ($this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10) { wfDebugLog('generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey()); } } $this->mOutput->setText($text); $this->mRevisionId = $oldRevisionId; $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; wfProfileOut($fname); wfProfileOut(__METHOD__); return $this->mOutput; }
/** * Do any required setup which is dependent on test options. * * @see staticSetup() for more information about setup/teardown * * @param array $test Test info supplied by TestFileReader * @param callable|null $nextTeardown * @return ScopedCallback */ public function perTestSetup($test, $nextTeardown = null) { $teardown = []; $this->checkSetupDone('setupDatabase', 'setDatabase'); $teardown[] = $this->markSetupDone('perTestSetup'); $opts = $this->parseOptions($test['options']); $config = $test['config']; // Find out values for some special options. $langCode = self::getOptionValue('language', $opts, 'en'); $variant = self::getOptionValue('variant', $opts, false); $maxtoclevel = self::getOptionValue('wgMaxTocLevel', $opts, 999); $linkHolderBatchSize = self::getOptionValue('wgLinkHolderBatchSize', $opts, 1000); $setup = ['wgEnableUploads' => self::getOptionValue('wgEnableUploads', $opts, true), 'wgLanguageCode' => $langCode, 'wgRawHtml' => self::getOptionValue('wgRawHtml', $opts, false), 'wgNamespacesWithSubpages' => [0 => isset($opts['subpage'])], 'wgMaxTocLevel' => $maxtoclevel, 'wgAllowExternalImages' => self::getOptionValue('wgAllowExternalImages', $opts, true), 'wgThumbLimits' => [self::getOptionValue('thumbsize', $opts, 180)], 'wgDefaultLanguageVariant' => $variant, 'wgLinkHolderBatchSize' => $linkHolderBatchSize, 'wgEnableMagicLinks' => self::getOptionValue('wgEnableMagicLinks', $opts, []) + ['ISBN' => true, 'PMID' => true, 'RFC' => true]]; if ($config) { $configLines = explode("\n", $config); foreach ($configLines as $line) { list($var, $value) = explode('=', $line, 2); $setup[$var] = eval("return {$value};"); } } /** @since 1.20 */ Hooks::run('ParserTestGlobals', [&$setup]); // Create tidy driver if (isset($opts['tidy'])) { // Cache a driver instance if ($this->tidyDriver === null) { $this->tidyDriver = MWTidy::factory($this->tidySupport->getConfig()); } $tidy = $this->tidyDriver; } else { $tidy = false; } MWTidy::setInstance($tidy); $teardown[] = function () { MWTidy::destroySingleton(); }; // Set content language. This invalidates the magic word cache and title services $lang = Language::factory($langCode); $setup['wgContLang'] = $lang; $reset = function () { MagicWord::clearCache(); $this->resetTitleServices(); }; $setup[] = $reset; $teardown[] = $reset; // Make a user object with the same language $user = new User(); $user->setOption('language', $langCode); $setup['wgLang'] = $lang; // We (re)set $wgThumbLimits to a single-element array above. $user->setOption('thumbsize', 0); $setup['wgUser'] = $user; // And put both user and language into the context $context = RequestContext::getMain(); $context->setUser($user); $context->setLanguage($lang); $teardown[] = function () use($context) { // Reset context to the restored globals $context->setUser($GLOBALS['wgUser']); $context->setLanguage($GLOBALS['wgContLang']); }; $teardown[] = $this->executeSetupSnippets($setup); return $this->createTeardownObject($teardown, $nextTeardown); }
/** * Helper function for parse() that transforms half-parsed HTML into fully * parsed HTML. * * @param string $text * @param bool $isMain * @param bool $linestart * @return string */ private function internalParseHalfParsed($text, $isMain = true, $linestart = true) { $text = $this->mStripState->unstripGeneral($text); if ($isMain) { Hooks::run('ParserAfterUnstrip', array(&$this, &$text)); } # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array('/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', '/(\\302\\253) /' => '\\1 ', '/ (!\\s*important)/' => ' \\1'); $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text); $text = $this->doBlockLevels($text, $linestart); $this->replaceLinkHolders($text); /** * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table * d) it is an interface message (which is in the user language) */ if (!($this->mOptions->getDisableContentConversion() || isset($this->mDoubleUnderscores['nocontentconvert']))) { if (!$this->mOptions->getInterfaceMessage()) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. $text = $this->getConverterLanguage()->convert($text); } } $text = $this->mStripState->unstripNoWiki($text); if ($isMain) { Hooks::run('ParserBeforeTidy', array(&$this, &$text)); } $text = $this->replaceTransparentTags($text); $text = $this->mStripState->unstripGeneral($text); $text = Sanitizer::normalizeCharReferences($text); if (MWTidy::isEnabled() && $this->mOptions->getTidy()) { $text = MWTidy::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array('/(<([bi])>)(<([bi])>)?([^<]*)(<\\/?a[^<]*>)([^<]*)(<\\/\\4>)?(<\\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\\/a>(.*)<\\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\\/div>)([^<]*)(<\\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', '/<([bi])><\\/\\1>/' => ''); $text = preg_replace(array_keys($tidyregs), array_values($tidyregs), $text); } if ($isMain) { Hooks::run('ParserAfterTidy', array(&$this, &$text)); } return $text; }
/** * Split summary of a wikilog article from the contents. * If summary is part of the parser output, use it; otherwise, try to * extract it from the content text (section zero, before the first * heading). * * @param $parserOutput ParserOutput object. * @return Two-element array with summary and content. Summary may be * NULL if nonexistent. */ public static function splitSummaryContent( $parserOutput ) { global $wgUseTidy; $content = Sanitizer::removeHTMLcomments( $parserOutput->getText() ); if ( isset( $parserOutput->mExtWikilog ) && $parserOutput->mExtWikilog->mSummary ) { # Parser output contains wikilog output and summary, use it. $summary = Sanitizer::removeHTMLcomments( $parserOutput->mExtWikilog->mSummary ); } else { # Try to extract summary from the content text. $blocks = preg_split( '/<(h[1-6]).*?>.*?<\\/\\1>/i', $content, 2 ); if ( count( $blocks ) > 1 ) { # Long article with multiple sections, use only the first one. $summary = $blocks[0]; # It is possible for the regex to split on a heading that is # not a child of the root element (e.g. <div><h2>...</h2> # </div> leaving an open <div> tag). In order to handle such # cases, we pass the summary through tidy if it is available. if ( $wgUseTidy ) { $summary = MWTidy::tidy( $summary ); } } else { # Short article with a single section, use no summary and # leave to the caller to decide what to do. $summary = null; } } return array( $summary, $content ); }
/** * Show the special page */ function execute($subpage) { global $wgParser, $wgUseTidy, $wgAlwaysUseTidy; $this->setHeaders(); $request = $this->getRequest(); $titleStr = $request->getText('wpContextTitle'); $title = Title::newFromText($titleStr); if (!$title) { $title = $this->getPageTitle(); } $input = $request->getText('wpInput'); $this->generateXML = $request->getBool('wpGenerateXml'); $this->generateRawHtml = $request->getBool('wpGenerateRawHtml'); if (strlen($input)) { $this->removeComments = $request->getBool('wpRemoveComments', false); $this->removeNowiki = $request->getBool('wpRemoveNowiki', false); $options = ParserOptions::newFromContext($this->getContext()); $options->setRemoveComments($this->removeComments); $options->setTidy(true); $options->setMaxIncludeSize(self::MAX_INCLUDE_SIZE); if ($this->generateXML) { $wgParser->startExternalParse($title, $options, OT_PREPROCESS); $dom = $wgParser->preprocessToDom($input); if (method_exists($dom, 'saveXML')) { $xml = $dom->saveXML(); } else { $xml = $dom->__toString(); } } $output = $wgParser->preprocess($input, $title, $options); } else { $this->removeComments = $request->getBool('wpRemoveComments', true); $this->removeNowiki = $request->getBool('wpRemoveNowiki', false); $output = false; } $out = $this->getOutput(); $out->addWikiMsg('expand_templates_intro'); $out->addHTML($this->makeForm($titleStr, $input)); if ($output !== false) { if ($this->generateXML && strlen($output) > 0) { $out->addHTML($this->makeOutput($xml, 'expand_templates_xml_output')); } $tmp = $this->makeOutput($output); if ($this->removeNowiki) { $tmp = preg_replace(array('_<nowiki>_', '_</nowiki>_', '_<nowiki */>_'), '', $tmp); } if ($wgUseTidy && $options->getTidy() || $wgAlwaysUseTidy) { $tmp = MWTidy::tidy($tmp); } $out->addHTML($tmp); $rawhtml = $this->generateHtml($title, $output); if ($this->generateRawHtml && strlen($rawhtml) > 0) { $out->addHTML($this->makeOutput($rawhtml, 'expand_templates_html_output')); } $this->showHtmlPreview($title, $rawhtml, $out); } }
/** * Run the "tidy" command on text if the $wgUseTidy * global is true * * @param $text String: the text to tidy * @return String */ protected function tidy($text) { global $wgUseTidy; if ($wgUseTidy) { $text = MWTidy::tidy($text); } return $text; }
protected function tearDown() { MWTidy::destroySingleton(); parent::tearDown(); }
/** * @group medium * @dataProvider parserTestProvider * @param string $desc * @param string $input * @param string $result * @param array $opts * @param array $config */ public function testParserTest($desc, $input, $result, $opts, $config) { if ($this->regex != '' && !preg_match('/' . $this->regex . '/', $desc)) { $this->assertTrue(true); // XXX: don't flood output with "test made no assertions" //$this->markTestSkipped( 'Filtered out by the user' ); return; } if (!$this->isWikitextNS(NS_MAIN)) { // parser tests frequently assume that the main namespace contains wikitext. // @todo When setting up pages, force the content model. Only skip if // $wgtContentModelUseDB is false. $this->markTestSkipped("Main namespace does not support wikitext," . "skipping parser test: {$desc}"); } wfDebug("Running parser test: {$desc}\n"); $opts = $this->parseOptions($opts); $context = $this->setupGlobals($opts, $config); $user = $context->getUser(); $options = ParserOptions::newFromContext($context); if (isset($opts['title'])) { $titleText = $opts['title']; } else { $titleText = 'Parser test'; } $local = isset($opts['local']); $preprocessor = isset($opts['preprocessor']) ? $opts['preprocessor'] : null; $parser = $this->getParser($preprocessor); $title = Title::newFromText($titleText); # Parser test requiring math. Make sure texvc is executable # or just skip such tests. if (isset($opts['math']) || isset($opts['texvc'])) { global $wgTexvc; if (!isset($wgTexvc)) { $this->markTestSkipped("SKIPPED: \$wgTexvc is not set"); } elseif (!is_executable($wgTexvc)) { $this->markTestSkipped("SKIPPED: texvc binary does not exist" . " or is not executable.\n" . "Current configuration is:\n\$wgTexvc = '{$wgTexvc}'"); } } if (isset($opts['djvu'])) { if (!$this->djVuSupport->isEnabled()) { $this->markTestSkipped("SKIPPED: djvu binaries do not exist or are not executable.\n"); } } if (isset($opts['pst'])) { $out = $parser->preSaveTransform($input, $title, $user, $options); } elseif (isset($opts['msg'])) { $out = $parser->transformMsg($input, $options, $title); } elseif (isset($opts['section'])) { $section = $opts['section']; $out = $parser->getSection($input, $section); } elseif (isset($opts['replace'])) { $section = $opts['replace'][0]; $replace = $opts['replace'][1]; $out = $parser->replaceSection($input, $section, $replace); } elseif (isset($opts['comment'])) { $out = Linker::formatComment($input, $title, $local); } elseif (isset($opts['preload'])) { $out = $parser->getPreloadText($input, $title, $options); } else { $output = $parser->parse($input, $title, $options, true, true, 1337); $output->setTOCEnabled(!isset($opts['notoc'])); $out = $output->getText(); if (isset($opts['tidy'])) { if (!$this->tidySupport->isEnabled()) { $this->markTestSkipped("SKIPPED: tidy extension is not installed.\n"); } else { $out = MWTidy::tidy($out); $out = preg_replace('/\\s+$/', '', $out); } } if (isset($opts['showtitle'])) { if ($output->getTitleText()) { $title = $output->getTitleText(); } $out = "{$title}\n{$out}"; } if (isset($opts['ill'])) { $out = implode(' ', $output->getLanguageLinks()); } elseif (isset($opts['cat'])) { $outputPage = $context->getOutput(); $outputPage->addCategoryLinks($output->getCategories()); $cats = $outputPage->getCategoryLinks(); if (isset($cats['normal'])) { $out = implode(' ', $cats['normal']); } else { $out = ''; } } $parser->mPreprocessor = null; } $this->teardownGlobals(); $this->assertEquals($result, $out, $desc); }