function testDecodeTagAttributes() { $this->assertEquals(Sanitizer::decodeTagAttributes('foo=bar'), array('foo' => 'bar'), 'Unquoted attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes(' foo = bar '), array('foo' => 'bar'), 'Spaced attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo="bar"'), array('foo' => 'bar'), 'Double-quoted attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=\'bar\''), array('foo' => 'bar'), 'Single-quoted attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=\'bar\' baz="foo"'), array('foo' => 'bar', 'baz' => 'foo'), 'Several attributes'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=\'bar\' baz="foo"'), array('foo' => 'bar', 'baz' => 'foo'), 'Several attributes'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=\'bar\' baz="foo"'), array('foo' => 'bar', 'baz' => 'foo'), 'Several attributes'); $this->assertEquals(Sanitizer::decodeTagAttributes(':foo=\'bar\''), array(':foo' => 'bar'), 'Leading :'); $this->assertEquals(Sanitizer::decodeTagAttributes('_foo=\'bar\''), array('_foo' => 'bar'), 'Leading _'); $this->assertEquals(Sanitizer::decodeTagAttributes('Foo=\'bar\''), array('foo' => 'bar'), 'Leading capital'); $this->assertEquals(Sanitizer::decodeTagAttributes('FOO=BAR'), array('foo' => 'BAR'), 'Attribute keys are normalized to lowercase'); # Invalid beginning $this->assertEquals(Sanitizer::decodeTagAttributes('-foo=bar'), array(), 'Leading - is forbidden'); $this->assertEquals(Sanitizer::decodeTagAttributes('.foo=bar'), array(), 'Leading . is forbidden'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo-bar=bar'), array('foo-bar' => 'bar'), 'A - is allowed inside the attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo-=bar'), array('foo-' => 'bar'), 'A - is allowed inside the attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo.bar=baz'), array('foo.bar' => 'baz'), 'A . is allowed inside the attribute'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo.=baz'), array('foo.' => 'baz'), 'A . is allowed as last character'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo6=baz'), array('foo6' => 'baz'), 'Numbers are allowed'); # This bit is more relaxed than XML rules, but some extensions use it, like ProofreadPage (see bug 27539) $this->assertEquals(Sanitizer::decodeTagAttributes('1foo=baz'), array('1foo' => 'baz'), 'Leading numbers are allowed'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo$=baz'), array(), 'Symbols are not allowed'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo@=baz'), array(), 'Symbols are not allowed'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo~=baz'), array(), 'Symbols are not allowed'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=1[#^`*%w/('), array('foo' => '1[#^`*%w/('), 'All kind of characters are allowed as values'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo="1[#^`*%\'w/("'), array('foo' => '1[#^`*%\'w/('), 'Double quotes are allowed if quoted by single quotes'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=\'1[#^`*%"w/(\''), array('foo' => '1[#^`*%"w/('), 'Single quotes are allowed if quoted by double quotes'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=&"'), array('foo' => '&"'), 'Special chars can be provided as entities'); $this->assertEquals(Sanitizer::decodeTagAttributes('foo=&foobar;'), array('foo' => '&foobar;'), 'Entity-like items are accepted'); }
function linkattr_ExternalLink(&$url, &$text, &$link, &$attribs, $linktype) { $attribsText = Linker::getExternalLinkAttributes('external ' . $linktype); $mergedattribs = array_merge($attribs, Sanitizer::decodeTagAttributes($attribsText)); linkattr_ModifyLink($text, $mergedattribs, 1); if ($mergedattribs) { $attribsText = Xml::expandAttributes($mergedattribs); } $link = sprintf('<a href="%s"%s>%s</a>', $url, $attribsText, $text); return false; }
function execute($params) { global $wgRequest, $wgOut, $wgTitle, $wgUser; global $wgContLang, $wgProxyKey, $wgParser; $article = $wgRequest->getText('article', $params); $map = $wgRequest->getText('map', $params); $wgOut->disable(); header("Cache-Control: no-cache, must-revalidate"); header("Content-type: application/vnd.google-earth.kml+xml"); header('Content-Disposition: attachment; filename="' . $article . '.kml"'); $title = Title::newFromText($article); /* Wikia change begin - @author: Sebastian Marzjan */ /* fogbugz BugID #18043 */ if ($title instanceof Title) { /* Wikia change end */ $revision = Revision::newFromTitle($title); /* Wikia change begin - @author: Sebastian Marzjan */ /* fogbugz BugID #18043 */ if (!$revision instanceof Revision) { $errorMessage = 'SpecialGoogleMapsKML.php ' . __LINE__ . ' - no revision for ' . $article . ' / ' . $title->getArticleID(); Wikia::log(__METHOD__, false, $errorMessage); echo "No article revisions found by the name of {$article}"; return false; } /* Wikia change end */ $mapOptions = GoogleMaps::getMapSettings($title, array('icons' => 'http://maps.google.com/mapfiles/kml/pal4/{label}.png', 'icon' => 'icon57')); $exporter = new GoogleMapsKmlExporter($wgContLang, str_replace('{label}', $mapOptions['icon'], $mapOptions['icons'])); $wgParser->mOptions = ParserOptions::newFromUser($wgUser); $wgParser->mOptions->setEditSection(false); $wgParser->mTitle = $wgTitle; $wgParser->clearState(); $localParser = new Parser(); $localParser->mTitle = $title; $localParser->mOptions = $wgParser->mOptions; if (preg_match_all("/<googlemap( .*?|)>(.*?)<\\/googlemap>/s", $revision->getText(), $matches)) { $exporter->addFileHeader(); for ($i = 0; $i < count($matches[2]); $i++) { $attrs = Sanitizer::decodeTagAttributes($matches[1][$i]); $mapOptions['version'] = isset($attrs['version']) ? $attrs['version'] : "0"; $exporter->addHeader(isset($attrs['title']) ? $attrs['title'] : "Map #" . ($i + 1)); GoogleMaps::renderContent($matches[2][$i], $wgParser, $localParser, $exporter, $mapOptions); $exporter->addTrailer(); } $exporter->addFileTrailer(); echo $exporter->render(); } else { echo "No maps in {$article}!"; } } else { echo "No article found by the name of {$article}"; } }
function execute($params) { global $wgRequest, $wgOut, $wgUser; global $wgContLang, $wgProxyKey, $wgParser; $article = $wgRequest->getText('article', $params); $map = $wgRequest->getText('map', $params); $wgOut->disable(); header("Cache-Control: no-cache, must-revalidate"); header("Content-type: application/vnd.google-earth.kml+xml"); header('Content-Disposition: attachment; filename="' . $article . '.kml"'); $title = Title::newFromText($article); if ($title) { $revision = Revision::newFromTitle($title); $mapOptions = GoogleMaps::getMapSettings($title, array('icons' => 'http://maps.google.com/mapfiles/kml/pal4/{label}.png', 'icon' => 'icon57')); $exporter = new GoogleMapsKmlExporter($wgContLang, str_replace('{label}', $mapOptions['icon'], $mapOptions['icons'])); $popts = ParserOptions::newFromUser($wgUser); $popts->setEditSection(false); $wgParser->startExternalParse($this->getTitle(), $popts, OT_WIKI, true); $localParser = new Parser(); $localParser->startExternalParse($this->getTitle(), $popts, OT_WIKI, true); if (preg_match_all("/<googlemap( .*?|)>(.*?)<\\/googlemap>/s", $revision->getText(), $matches)) { $exporter->addFileHeader(); for ($i = 0; $i < count($matches[2]); $i++) { $attrs = Sanitizer::decodeTagAttributes($matches[1][$i]); $mapOptions['version'] = isset($attrs['version']) ? $attrs['version'] : "0"; $exporter->addHeader(isset($attrs['title']) ? $attrs['title'] : "Map #" . ($i + 1)); GoogleMaps::renderContent($matches[2][$i], $wgParser, $localParser, $exporter, $mapOptions); $exporter->addTrailer(); } $exporter->addFileTrailer(); echo $exporter->render(); } else { echo "No maps in {$article}!"; } } else { echo "No article found by the name of {$article}"; } }
/** * Return the text to be used for a given extension tag. * This is the ghost of strip(). * * @param array $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes * inner Contents of extension element * noClose Original text did not have a close tag * @param PPFrame $frame */ function extensionSubstitution($params, $frame) { global $wgRawHtml, $wgContLang; $name = $frame->expand($params['name']); $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']); $content = !isset($params['inner']) ? null : $frame->expand($params['inner']); $marker = "{$this->mUniqPrefix}-{$name}-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; if ($this->ot['html']) { $name = strtolower($name); $attributes = Sanitizer::decodeTagAttributes($attrText); if (isset($params['attributes'])) { $attributes = $attributes + $params['attributes']; } switch ($name) { case 'html': if ($wgRawHtml) { $output = $content; break; } else { throw new MWException('<html> extension tag encountered unexpectedly'); } case 'nowiki': $content = strtr($content, array('-{' => '-{', '}-' => '}-')); $output = Xml::escapeTagsOnly($content); break; case 'math': $output = $wgContLang->armourMath(MathRenderer::renderMath($content, $attributes)); break; case 'gallery': $output = $this->renderImageGallery($content, $attributes); break; default: if (isset($this->mTagHooks[$name])) { # Workaround for PHP bug 35229 and similar if (!is_callable($this->mTagHooks[$name])) { throw new MWException("Tag hook for {$name} is not callable\n"); } $output = call_user_func_array($this->mTagHooks[$name], array($content, $attributes, $this)); } else { $output = '<span class="error">Invalid tag extension name: ' . htmlspecialchars($name) . '</span>'; } } } else { if (is_null($attrText)) { $attrText = ''; } if (isset($params['attributes'])) { foreach ($params['attributes'] as $attrName => $attrValue) { $attrText .= ' ' . htmlspecialchars($attrName) . '="' . htmlspecialchars($attrValue) . '"'; } } if ($content === null) { $output = "<{$name}{$attrText}/>"; } else { $close = is_null($params['close']) ? '' : $frame->expand($params['close']); $output = "<{$name}{$attrText}>{$content}{$close}"; } } if ($name === 'html' || $name === 'nowiki') { $this->mStripState->nowiki->setPair($marker, $output); } else { $this->mStripState->general->setPair($marker, $output); } return $marker; }
/** * Take a tag soup fragment listing an HTML element's attributes * and normalize it to well-formed XML, discarding unwanted attributes. * Output is safe for further wikitext processing, with escaping of * values that could trigger problems. * * - Normalizes attribute names to lowercase * - Discards attributes not on a whitelist for the given element * - Turns broken or invalid entities into plaintext * - Double-quotes all attribute values * - Attributes without values are given the name as attribute * - Double attributes are discarded * - Unsafe style attributes are discarded * - Prepends space if there are attributes. * * @param string $text * @param string $element * @return string */ static function fixTagAttributes($text, $element) { if (trim($text) == '') { return ''; } $decoded = Sanitizer::decodeTagAttributes($text); $stripped = Sanitizer::validateTagAttributes($decoded, $element); return Sanitizer::safeEncodeTagAttributes($stripped); }
/** * Take a tag soup fragment listing an HTML element's attributes * and normalize it to well-formed XML, discarding unwanted attributes. * Output is safe for further wikitext processing, with escaping of * values that could trigger problems. * * - Normalizes attribute names to lowercase * - Discards attributes not on a whitelist for the given element * - Turns broken or invalid entities into plaintext * - Double-quotes all attribute values * - Attributes without values are given the name as attribute * - Double attributes are discarded * - Unsafe style attributes are discarded * - Prepends space if there are attributes. * * @param $text String * @param $element String * @return String */ static function fixTagAttributes($text, $element) { if (trim($text) == '') { return ''; } $stripped = Sanitizer::validateTagAttributes(Sanitizer::decodeTagAttributes($text), $element); $attribs = array(); foreach ($stripped as $attribute => $value) { $encAttribute = htmlspecialchars($attribute); $encValue = Sanitizer::safeEncodeAttribute($value); $attribs[] = "{$encAttribute}=\"{$encValue}\""; } return count($attribs) ? ' ' . implode(' ', $attribs) : ''; }
/** * Hook to provide syntax highlighting for API pretty-printed output * * @param IContextSource $context * @param string $text * @param string $mime * @param string $format * @since MW 1.24 */ public static function onApiFormatHighlight(IContextSource $context, $text, $mime, $format) { if (!isset(self::$mimeLexers[$mime])) { return true; } $lexer = self::$mimeLexers[$mime]; $status = self::highlight($text, $lexer); if (!$status->isOK()) { return true; } $out = $status->getValue(); if (preg_match('/^<pre([^>]*)>/i', $out, $m)) { $attrs = Sanitizer::decodeTagAttributes($m[1]); $attrs['class'] .= ' api-pretty-content'; $encodedAttrs = Sanitizer::safeEncodeTagAttributes($attrs); $out = '<pre' . $encodedAttrs . '>' . substr($out, strlen($m[0])); } $output = $context->getOutput(); $output->addModuleStyles('ext.pygments'); $output->addHTML('<div dir="ltr">' . $out . '</div>'); // Inform MediaWiki that we have parsed this page and it shouldn't mess with it. return false; }
/** * @deprecated since 1.16 Use link() * * Make a link for a title which definitely exists. This is faster than makeLinkObj because * it doesn't have to do a database query. It's also valid for interwiki titles and special * pages. * * @param $title Title object of target page * @param $text String: text to replace the title * @param $query String: link target * @param $trail String: text after link * @param $prefix String: text before link text * @param $aprops String: extra attributes to the a-element * @param $style String: style to apply - if empty, use getInternalLinkAttributesObj instead * @return the a-element */ static function makeKnownLinkObj($title, $text = '', $query = '', $trail = '', $prefix = '', $aprops = '', $style = '') { wfProfileIn(__METHOD__); if ($text == '') { $text = self::linkText($title); } $attribs = Sanitizer::mergeAttributes(Sanitizer::decodeTagAttributes($aprops), Sanitizer::decodeTagAttributes($style)); $query = wfCgiToArray($query); list($inside, $trail) = self::splitTrail($trail); $ret = self::link($title, "{$prefix}{$text}{$inside}", $attribs, $query, array('known', 'noclasses')) . $trail; wfProfileOut(__METHOD__); return $ret; }
/** * AJAX helper called from view mode to save gallery data * @author Marooned */ public static function saveGalleryDataByHash($hash, $wikitext, $starttime) { global $wgTitle, $wgUser; wfProfileIn(__METHOD__); wfDebug(__METHOD__ . ": {$wikitext}\n"); $result = array(); // save changed gallery $rev = Revision::newFromTitle($wgTitle); // try to fix fatal (article has been removed since user opened the page) if (empty($rev)) { $result['info'] = 'conflict'; wfDebug(__METHOD__ . ": revision is empty\n"); wfProfileOut(__METHOD__); return $result; } $articleWikitext = $rev->getText(); $gallery = ''; preg_match_all('%<gallery([^>]*)>(.*?)</gallery>%s', $articleWikitext, $matches, PREG_PATTERN_ORDER); for ($i = 0; $i < count($matches[0]); $i++) { $attribs = Sanitizer::decodeTagAttributes($matches[1][$i]); //count hash from attribs and content if (md5($matches[2][$i] . implode('', $attribs)) == $hash) { $gallery = $matches[0][$i]; break; } } if (empty($gallery)) { $result['info'] = 'conflict'; wfDebug(__METHOD__ . ": conflict found\n"); } else { $articleWikitext = str_replace($gallery, $wikitext, $articleWikitext); //saving if ($wgTitle->userCan('edit') && !$wgUser->isBlocked()) { $result = null; $article = new Article($wgTitle); $editPage = new EditPage($article); $editPage->edittime = $article->getTimestamp(); $editPage->starttime = $starttime; $editPage->textbox1 = $articleWikitext; $editPage->summary = wfMsgForContent('wikiaPhotoGallery-edit-summary'); // watch all my edits / preserve watchlist (RT #59138) if ($wgUser->getOption('watchdefault')) { $editPage->watchthis = true; } else { $editPage->watchthis = $editPage->mTitle->userIsWatching(); } $bot = $wgUser->isAllowed('bot'); $status = $editPage->internalAttemptSave($result, $bot); $retval = $status->value; Wikia::log(__METHOD__, "editpage", "Returned value {$retval}"); switch ($retval) { case EditPage::AS_SUCCESS_UPDATE: case EditPage::AS_SUCCESS_NEW_ARTICLE: $wgTitle->invalidateCache(); Article::onArticleEdit($wgTitle); $result['info'] = 'ok'; break; case EditPage::AS_SPAM_ERROR: $result['error'] = wfMsg('spamprotectiontext') . '<p>( Call #4 )</p>'; break; default: $result['error'] = wfMsg('wikiaPhotoGallery-edit-abort'); } } else { $result['error'] = wfMsg('wikiaPhotoGallery-error-user-rights'); } if (isset($result['error'])) { $result['errorCaption'] = wfMsg('wikiaPhotoGallery-error-caption'); } //end of saving wfDebug(__METHOD__ . ": saving from view mode done\n"); // commit (RT #48304) $dbw = wfGetDB(DB_MASTER); $dbw->commit(); } wfProfileOut(__METHOD__); return $result; }
/** * @dataProvider provideTagAttributesToDecode * @covers Sanitizer::decodeTagAttributes */ function testDecodeTagAttributes($expected, $attributes, $message = '') { $this->assertEquals($expected, Sanitizer::decodeTagAttributes($attributes), $message); }
/** * Replaces all occurrences of HTML-style comments and the given tags * in the text with a random marker and returns teh next text. The output * parameter $matches will be an associative array filled with data in * the form: * 'UNIQ-xxxxx' => array( * 'element', * 'tag content', * array( 'param' => 'x' ), * '<element param="x">tag content</element>' ) ) * * @param $elements list of element names. Comments are always extracted. * @param $text Source text string. * @param $uniq_prefix * * @public * @static */ function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = '') { static $n = 1; $stripped = ''; $matches = array(); $taglist = implode('|', $elements); $start = "/<({$taglist})(\\s+[^>]*?|\\s*?)(\\/?>)|<(!--)/i"; while ('' != $text) { $p = preg_split($start, $text, 2, PREG_SPLIT_DELIM_CAPTURE); $stripped .= $p[0]; if (count($p) < 5) { break; } if (count($p) > 5) { // comment $element = $p[4]; $attributes = ''; $close = ''; $inside = $p[5]; } else { // tag $element = $p[1]; $attributes = $p[2]; $close = $p[3]; $inside = $p[4]; } $marker = "{$uniq_prefix}-{$element}-" . sprintf('%08X', $n++) . "-QINU"; $stripped .= $marker; if ($close === '/>') { // Empty element tag, <tag /> $content = null; $text = $inside; $tail = null; } else { if ($element == '!--') { $end = '/(-->)/'; } else { $end = "/(<\\/{$element}\\s*>)/i"; } $q = preg_split($end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE); $content = $q[0]; if (count($q) < 3) { # No end tag -- let it run out to the end of the text. $tail = ''; $text = ''; } else { $tail = $q[1]; $text = $q[2]; } } $matches[$marker] = array($element, $content, Sanitizer::decodeTagAttributes($attributes), "<{$element}{$attributes}{$close}{$content}{$tail}"); } return $stripped; }
/** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * * @param $parser Parser: parent parser * @param string $text desired title text * @return String */ static function displaytitle($parser, $text = '') { global $wgRestrictDisplayTitle; // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes($text); // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = preg_replace('/' . preg_quote($parser->uniqPrefix(), '/') . '.*?' . preg_quote(Parser::MARKER_SUFFIX, '/') . '/', '', $text); // list of disallowed tags for DISPLAYTITLE // these will be escaped even though they are allowed in normal wiki text $bad = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rp', 'br'); // disallow some styles that could be used to bypass $wgRestrictDisplayTitle if ($wgRestrictDisplayTitle) { $htmlTagsCallback = function (&$params) { $decoded = Sanitizer::decodeTagAttributes($params); if (isset($decoded['style'])) { // this is called later anyway, but we need it right now for the regexes below to be safe // calling it twice doesn't hurt $decoded['style'] = Sanitizer::checkCss($decoded['style']); if (preg_match('/(display|user-select|visibility)\\s*:/i', $decoded['style'])) { $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; } } $params = Sanitizer::safeEncodeTagAttributes($decoded); }; } else { $htmlTagsCallback = null; } // only requested titles that normalize to the actual title are allowed through // if $wgRestrictDisplayTitle is true (it is by default) // mimic the escaping process that occurs in OutputPage::setPageTitle $text = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($text, $htmlTagsCallback, array(), array(), $bad)); $title = Title::newFromText(Sanitizer::stripAllTags($text)); if (!$wgRestrictDisplayTitle) { $parser->mOutput->setDisplayTitle($text); } elseif ($title instanceof Title && !$title->hasFragment() && $title->equals($parser->mTitle)) { $parser->mOutput->setDisplayTitle($text); } return ''; }
/** * Hook to provide syntax highlighting for API pretty-printed output * * @param IContextSource $context * @param string $text * @param string $mime * @param string $format * @since MW 1.24 */ public static function apiFormatHighlight(IContextSource $context, $text, $mime, $format) { switch ($mime) { case 'text/javascript': case 'application/json': $lang = 'javascript'; break; case 'text/xml': $lang = 'xml'; break; default: // Don't know how to handle this return true; } $geshi = self::prepare($text, $lang); if ($geshi instanceof GeSHi) { $out = $geshi->parse_code(); if (!$geshi->error()) { if (preg_match('/^<pre([^>]*)>/i', $out, $m)) { $attrs = Sanitizer::decodeTagAttributes($m[1]); $attrs['class'] .= ' api-pretty-content'; $out = '<pre' . Sanitizer::safeEncodeTagAttributes($attrs) . '>' . substr($out, strlen($m[0])); } $output = $context->getOutput(); $output->addModuleStyles(array("ext.geshi.language.{$lang}", 'ext.geshi.local')); $output->addHTML("<div dir=\"ltr\">{$out}</div>"); // Inform MediaWiki that we have parsed this page and it shouldn't mess with it. return false; } } // Bottle out return true; }
/** * Return the text to be used for a given extension tag. * This is the ghost of strip(). * * @param array $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes * inner Contents of extension element * noClose Original text did not have a close tag * @param $frame PPFrame * * @throws MWException * @return string */ function extensionSubstitution($params, $frame) { $name = $frame->expand($params['name']); $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']); $content = !isset($params['inner']) ? null : $frame->expand($params['inner']); $marker = "{$this->mUniqPrefix}-{$name}-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) && ($this->ot['html'] || $this->ot['pre']); if ($isFunctionTag) { $markerType = 'none'; } else { $markerType = 'general'; } if ($this->ot['html'] || $isFunctionTag) { $name = strtolower($name); $attributes = Sanitizer::decodeTagAttributes($attrText); if (isset($params['attributes'])) { $attributes = $attributes + $params['attributes']; } if (isset($this->mTagHooks[$name])) { # Workaround for PHP bug 35229 and similar if (!is_callable($this->mTagHooks[$name])) { throw new MWException("Tag hook for {$name} is not callable\n"); } $output = call_user_func_array($this->mTagHooks[$name], array($content, $attributes, $this, $frame)); } elseif (isset($this->mFunctionTagHooks[$name])) { list($callback, ) = $this->mFunctionTagHooks[$name]; if (!is_callable($callback)) { throw new MWException("Tag hook for {$name} is not callable\n"); } $output = call_user_func_array($callback, array(&$this, $frame, $content, $attributes)); } else { $output = '<span class="error">Invalid tag extension name: ' . htmlspecialchars($name) . '</span>'; } if (is_array($output)) { # Extract flags to local scope (to override $markerType) $flags = $output; $output = $flags[0]; unset($flags[0]); extract($flags); } } else { if (is_null($attrText)) { $attrText = ''; } if (isset($params['attributes'])) { foreach ($params['attributes'] as $attrName => $attrValue) { $attrText .= ' ' . htmlspecialchars($attrName) . '="' . htmlspecialchars($attrValue) . '"'; } } if ($content === null) { $output = "<{$name}{$attrText}/>"; } else { $close = is_null($params['close']) ? '' : $frame->expand($params['close']); $output = "<{$name}{$attrText}>{$content}{$close}"; } } if ($markerType === 'none') { return $output; } elseif ($markerType === 'nowiki') { $this->mStripState->addNoWiki($marker, $output); } elseif ($markerType === 'general') { $this->mStripState->addGeneral($marker, $output); } else { throw new MWException(__METHOD__ . ': invalid marker type'); } return $marker; }
/** * Return the text to be used for a given extension tag. * This is the ghost of strip(). * * @param $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes * inner Contents of extension element * noClose Original text did not have a close tag * @param $frame PPFrame * * @return string */ function extensionSubstitution($params, $frame) { $name = $frame->expand($params['name']); $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']); $content = !isset($params['inner']) ? null : $frame->expand($params['inner']); # RTE (Rich Text Editor) - begin # @author: Inez Korczyński global $wgRTEParserEnabled; if (!empty($wgRTEParserEnabled)) { $wikitextIdx = RTEMarker::getDataIdx(RTEMarker::EXT_WIKITEXT, $content); # Allow parser extensions to generate their own placeholders (instead of default one from RTE) # @author: Macbre if (wfRunHooks('RTEUseDefaultPlaceholder', array($name, $params, $frame, $wikitextIdx))) { if ($wikitextIdx !== null) { $dataIdx = RTEData::put('placeholder', array('type' => 'ext', 'wikitextIdx' => $wikitextIdx)); return RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx); } } else { RTE::log(__METHOD__, "skipped default placeholder for <{$name}>"); // restore value of $content $content = RTEData::get('wikitext', $wikitextIdx); // keep inner content of tag $content = preg_replace('#^<[^>]+>(.*)<[^>]+>$#s', '\\1', $content); } } # RTE - end $marker = "{$this->mUniqPrefix}-{$name}-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) && ($this->ot['html'] || $this->ot['pre']); if ($isFunctionTag) { $markerType = 'none'; } else { $markerType = 'general'; } if ($this->ot['html'] || $isFunctionTag) { $name = strtolower($name); # PLB - begin # @author: Tomasz Odrobny $this->mCurrentTagName = $name; # PLB - end $attributes = Sanitizer::decodeTagAttributes($attrText); if (isset($params['attributes'])) { $attributes = $attributes + $params['attributes']; } if (isset($this->mTagHooks[$name])) { # Workaround for PHP bug 35229 and similar if (!is_callable($this->mTagHooks[$name])) { throw new MWException("Tag hook for {$name} is not callable\n"); } wfRunHooks('ParserTagHooksBeforeInvoke', [$name, $marker, $content, $attributes, $this, $frame]); $output = call_user_func_array($this->mTagHooks[$name], array($content, $attributes, $this, $frame)); } elseif (isset($this->mFunctionTagHooks[$name])) { list($callback, $flags) = $this->mFunctionTagHooks[$name]; if (!is_callable($callback)) { throw new MWException("Tag hook for {$name} is not callable\n"); } $output = call_user_func_array($callback, array(&$this, $frame, $content, $attributes)); } else { $output = '<span class="error">Invalid tag extension name: ' . htmlspecialchars($name) . '</span>'; } if (is_array($output)) { # Extract flags to local scope (to override $markerType) $flags = $output; $output = $flags[0]; unset($flags[0]); extract($flags); } } else { if (is_null($attrText)) { $attrText = ''; } if (isset($params['attributes'])) { foreach ($params['attributes'] as $attrName => $attrValue) { $attrText .= ' ' . htmlspecialchars($attrName) . '="' . htmlspecialchars($attrValue) . '"'; } } if ($content === null) { $output = "<{$name}{$attrText}/>"; } else { $close = is_null($params['close']) ? '' : $frame->expand($params['close']); $output = "<{$name}{$attrText}>{$content}{$close}"; } } if ($markerType === 'none') { return $output; } elseif ($markerType === 'nowiki') { $this->mStripState->addNoWiki($marker, $output); } elseif ($markerType === 'general') { $this->mStripState->addGeneral($marker, $output); } else { throw new MWException(__METHOD__ . ': invalid marker type'); } return $marker; }
/** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * * @param Parser $parser Parent parser * @param string $text Desired title text * @param string $uarg * @return string */ public static function displaytitle($parser, $text = '', $uarg = '') { global $wgRestrictDisplayTitle; static $magicWords = null; if (is_null($magicWords)) { $magicWords = new MagicWordArray(['displaytitle_noerror', 'displaytitle_noreplace']); } $arg = $magicWords->matchStartToEnd($uarg); // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes($text); // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = $parser->killMarkers($text); // list of disallowed tags for DISPLAYTITLE // these will be escaped even though they are allowed in normal wiki text $bad = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rtc', 'rp', 'br']; // disallow some styles that could be used to bypass $wgRestrictDisplayTitle if ($wgRestrictDisplayTitle) { $htmlTagsCallback = function (&$params) { $decoded = Sanitizer::decodeTagAttributes($params); if (isset($decoded['style'])) { // this is called later anyway, but we need it right now for the regexes below to be safe // calling it twice doesn't hurt $decoded['style'] = Sanitizer::checkCss($decoded['style']); if (preg_match('/(display|user-select|visibility)\\s*:/i', $decoded['style'])) { $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; } } $params = Sanitizer::safeEncodeTagAttributes($decoded); }; } else { $htmlTagsCallback = null; } // only requested titles that normalize to the actual title are allowed through // if $wgRestrictDisplayTitle is true (it is by default) // mimic the escaping process that occurs in OutputPage::setPageTitle $text = Sanitizer::normalizeCharReferences(Sanitizer::removeHTMLtags($text, $htmlTagsCallback, [], [], $bad)); $title = Title::newFromText(Sanitizer::stripAllTags($text)); if (!$wgRestrictDisplayTitle || $title instanceof Title && !$title->hasFragment() && $title->equals($parser->mTitle)) { $old = $parser->mOutput->getProperty('displaytitle'); if ($old === false || $arg !== 'displaytitle_noreplace') { $parser->mOutput->setDisplayTitle($text); } if ($old !== false && $old !== $text && !$arg) { $converter = $parser->getConverterLanguage()->getConverter(); return '<span class="error">' . wfMessage('duplicate-displaytitle', $converter->markNoConversion(wfEscapeWikiText($old)), $converter->markNoConversion(wfEscapeWikiText($text)))->inContentLanguage()->text() . '</span>'; } else { return ''; } } else { $parser->addTrackingCategory('restricted-displaytitle-ignored'); $converter = $parser->getConverterLanguage()->getConverter(); return '<span class="error">' . wfMessage('restricted-displaytitle', $converter->markNoConversion(wfEscapeWikiText($text)))->inContentLanguage()->text() . '</span>'; } }
/** * Take a tag soup fragment listing an HTML element's attributes * and normalize it to well-formed XML, discarding unwanted attributes. * Output is safe for further wikitext processing, with escaping of * values that could trigger problems. * * - Normalizes attribute names to lowercase * - Discards attributes not on a whitelist for the given element * - Turns broken or invalid entities into plaintext * - Double-quotes all attribute values * - Attributes without values are given the name as attribute * - Double attributes are discarded * - Unsafe style attributes are discarded * - Prepends space if there are attributes. * * @param $text String * @param $element String * @return String */ static function fixTagAttributes($text, $element) { if (trim($text) == '') { return ''; } $decoded = Sanitizer::decodeTagAttributes($text); $decoded = Sanitizer::fixDeprecatedAttributes($decoded, $element); $stripped = Sanitizer::validateTagAttributes($decoded, $element); $attribs = array(); foreach ($stripped as $attribute => $value) { $encAttribute = htmlspecialchars($attribute); $encValue = Sanitizer::safeEncodeAttribute($value); # RTE (Rich Text Editor) - begin # @author: Inez Korczyński, macbre global $wgRTEParserEnabled; if (!empty($wgRTEParserEnabled) && $encAttribute == 'style') { // BugId:2462 - remove apostrophes from style attribute $encValue = str_replace(''', '', $encValue); $attribs[] = "data-rte-style=\"{$encValue}\""; } # RTE - end $attribs[] = "{$encAttribute}=\"{$encValue}\""; } # RTE (Rich Text Editor) - begin # @author: Inez Korczyński global $wgRTEParserEnabled; if (!empty($wgRTEParserEnabled)) { if (strpos($text, "") !== false) { RTE::$edgeCases[] = 'COMPLEX.08'; } $attribs[] = RTEParser::encodeAttributesStr($text); } # RTE - end return count($attribs) ? ' ' . implode(' ', $attribs) : ''; }
/** * Dictionary-based conversion. * This function would not parse the conversion rules. * If you want to parse rules, try to use convert() or * convertTo(). * * @param string $text The text to be converted * @param bool|string $toVariant The target language code * @return string The converted text */ public function autoConvert($text, $toVariant = false) { $this->loadTables(); if (!$toVariant) { $toVariant = $this->getPreferredVariant(); if (!$toVariant) { return $text; } } if ($this->guessVariant($text, $toVariant)) { return $text; } /* we convert everything except: 1. HTML markups (anything between < and >) 2. HTML entities 3. placeholders created by the parser */ $marker = '|' . Parser::MARKER_PREFIX . '[\\-a-zA-Z0-9]+'; // this one is needed when the text is inside an HTML markup $htmlfix = '|<[^>]+$|^[^<>]*>'; // disable convert to variants between <code> tags $codefix = '<code>.+?<\\/code>|'; // disable conversion of <script> tags $scriptfix = '<script.*?>.*?<\\/script>|'; // disable conversion of <pre> tags $prefix = '<pre.*?>.*?<\\/pre>|'; $reg = '/' . $codefix . $scriptfix . $prefix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; $startPos = 0; $sourceBlob = ''; $literalBlob = ''; // Guard against delimiter nulls in the input $text = str_replace("", '', $text); $markupMatches = null; $elementMatches = null; while ($startPos < strlen($text)) { if (preg_match($reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos)) { $elementPos = $markupMatches[0][1]; $element = $markupMatches[0][0]; } else { $elementPos = strlen($text); $element = ''; } // Queue the part before the markup for translation in a batch $sourceBlob .= substr($text, $startPos, $elementPos - $startPos) . ""; // Advance to the next position $startPos = $elementPos + strlen($element); // Translate any alt or title attributes inside the matched element if ($element !== '' && preg_match('/^(<[^>\\s]*)\\s([^>]*)(.*)$/', $element, $elementMatches)) { $attrs = Sanitizer::decodeTagAttributes($elementMatches[2]); $changed = false; foreach (['title', 'alt'] as $attrName) { if (!isset($attrs[$attrName])) { continue; } $attr = $attrs[$attrName]; // Don't convert URLs if (!strpos($attr, '://')) { $attr = $this->recursiveConvertTopLevel($attr, $toVariant); } // Remove HTML tags to avoid disrupting the layout $attr = preg_replace('/<[^>]+>/', '', $attr); if ($attr !== $attrs[$attrName]) { $attrs[$attrName] = $attr; $changed = true; } } if ($changed) { $element = $elementMatches[1] . Html::expandAttributes($attrs) . $elementMatches[3]; } } $literalBlob .= $element . ""; } // Do the main translation batch $translatedBlob = $this->translate($sourceBlob, $toVariant); // Put the output back together $translatedIter = StringUtils::explode("", $translatedBlob); $literalIter = StringUtils::explode("", $literalBlob); $output = ''; while ($translatedIter->valid() && $literalIter->valid()) { $output .= $translatedIter->current(); $output .= $literalIter->current(); $translatedIter->next(); $literalIter->next(); } return $output; }
private static function parse_index_text( $text ) { list( $page_namespace, $index_namespace ) = self::getPageAndIndexNamespace(); //check if it is using pagelist preg_match_all( "/<pagelist([^<]*?)\/>/is", $text, $m, PREG_PATTERN_ORDER ); if( $m[1] ) { $params_s = ''; for( $k = 0; $k < count( $m[1] ); $k++ ) { $params_s = $params_s . $m[1][$k]; } $params = Sanitizer::decodeTagAttributes( $params_s ); $links = null; } else { $params = null; $tag_pattern = "/\[\[$page_namespace:(.*?)(\|(.*?)|)\]\]/i"; preg_match_all( $tag_pattern, $text, $links, PREG_PATTERN_ORDER ); } // read attributes $attributes = array(); $var_names = explode( ' ', wfMsgForContent( 'proofreadpage_js_attributes' ) ); for( $i = 0; $i < count( $var_names ); $i++ ) { $tag_pattern = "/\n\|" . $var_names[$i] . "=(.*?)\n(\||\}\})/is"; //$var = 'proofreadPage' . $var_names[$i]; $var = strtolower( $var_names[$i] ); if( preg_match( $tag_pattern, $text, $matches ) ) { $attributes[$var] = $matches[1]; } else { $attributes[$var] = ''; } } return array( $links, $params, $attributes ); }