/** * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page * 3) Add a Table of contents on the top for users who have enabled the option * 4) Auto-anchor headings * * It loops through all headlines, collects the necessary data, then splits up the * string and re-inserts the newly formatted headlines. * * @param $text String * @param $origText String: original, untouched wikitext * @param $isMain Boolean * @private */ function formatHeadings($text, $origText, $isMain = true) { global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; # Inhibit editsection links if requested in the page if (isset($this->mDoubleUnderscores['noeditsection'])) { $showEditLink = 0; } else { $showEditLink = $this->mOptions->getEditSection(); } if ($showEditLink) { $this->mOutput->setEditSectionTokens(true); } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now $matches = array(); $numMatches = preg_match_all('/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)(?P<header>.*?)<\\/H[1-6] *>/i', $text, $matches); # if there are fewer than 4 headlines in the article, do not show TOC # unless it's been explicitly enabled. $enoughToc = $this->mShowToc && ($numMatches >= 4 || $this->mForceTocPosition); # Allow user to stipulate that a page should have a "new section" # link added via __NEWSECTIONLINK__ if (isset($this->mDoubleUnderscores['newsectionlink'])) { $this->mOutput->setNewSection(true); } # Allow user to remove the "new section" # link via __NONEWSECTIONLINK__ if (isset($this->mDoubleUnderscores['nonewsectionlink'])) { $this->mOutput->hideNewSection(true); } # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header if (isset($this->mDoubleUnderscores['forcetoc'])) { $this->mShowToc = true; $enoughToc = true; } # headline counter $headlineCount = 0; $numVisible = 0; # Ugh .. the TOC should have neat indentation levels which can be # passed to the skin functions. These are determined here $toc = ''; $full = ''; $head = array(); $sublevelCount = array(); $levelCount = array(); $level = 0; $prevlevel = 0; $toclevel = 0; $prevtoclevel = 0; $markerRegex = "{$this->mUniqPrefix}-h-(\\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->mTitle->getPrefixedDBkey(); $oldType = $this->mOutputType; $this->setOutputType(self::OT_WIKI); $frame = $this->getPreprocessor()->newFrame(); $root = $this->preprocessToDom($origText); $node = $root->getFirstChild(); $byteOffset = 0; $tocraw = array(); $refers = array(); foreach ($matches[3] as $headline) { $isTemplate = false; $titleText = false; $sectionIndex = false; $numbering = ''; $markerMatches = array(); if (preg_match("/^{$markerRegex}/", $headline, $markerMatches)) { $serial = $markerMatches[1]; list($titleText, $sectionIndex) = $this->mHeadings[$serial]; $isTemplate = $titleText != $baseTitleText; $headline = preg_replace("/^{$markerRegex}/", "", $headline); } if ($toclevel) { $prevlevel = $level; } $level = $matches[1][$headlineCount]; if ($level > $prevlevel) { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; if ($toclevel < $wgMaxTocLevel) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; } } elseif ($level < $prevlevel && $toclevel > 1) { # Decrease TOC level, find level to jump to for ($i = $toclevel; $i > 0; $i--) { if ($levelCount[$i] == $level) { # Found last matching level $toclevel = $i; break; } elseif ($levelCount[$i] < $level) { # Found first matching level below current level $toclevel = $i + 1; break; } } if ($i == 0) { $toclevel = 1; } if ($toclevel < $wgMaxTocLevel) { if ($prevtoclevel < $wgMaxTocLevel) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent($prevtoclevel - $toclevel); $prevtoclevel = $toclevel; } else { $toc .= Linker::tocLineEnd(); } } } else { # No change in level, end TOC line if ($toclevel < $wgMaxTocLevel) { $toc .= Linker::tocLineEnd(); } } $levelCount[$toclevel] = $level; # count number of headlines for each level @$sublevelCount[$toclevel]++; $dot = 0; for ($i = 1; $i <= $toclevel; $i++) { if (!empty($sublevelCount[$i])) { if ($dot) { $numbering .= '.'; } $numbering .= $this->getFunctionLang()->formatNum($sublevelCount[$i]); $dot = 1; } } # The safe header is a version of the header text safe to use for links # Avoid insertion of weird stuff like <math> by expanding the relevant sections $safeHeadline = $this->mStripState->unstripBoth($headline); # Remove link placeholders by the link text. # <!--LINK number--> # turns into # link text with suffix $safeHeadline = $this->replaceLinkHoldersText($safeHeadline); # Strip out HTML (other than plain <sup> and <sub>: bug 8393) $tocline = preg_replace(array('#<(?!/?(sup|sub)).*?' . '>#', '#<(/?(sup|sub)).*?' . '>#'), array('', '<$1>'), $safeHeadline); $tocline = trim($tocline); # For the anchor, strip out HTML-y stuff period $safeHeadline = preg_replace('/<.*?' . '>/', '', $safeHeadline); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace($safeHeadline); # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; if ($wgHtml5 && $wgExperimentalHtmlIds) { # For reverse compatibility, provide an id that's # HTML4-compatible, like we used to. # # It may be worth noting, academically, that it's possible for # the legacy anchor to conflict with a non-legacy headline # anchor on the page. In this case likely the "correct" thing # would be to either drop the legacy anchors or make sure # they're numbered first. However, this would require people # to type in section names like "abc_.D7.93.D7.90.D7.A4" # manually, so let's not bother worrying about it. $legacyHeadline = Sanitizer::escapeId($safeHeadline, array('noninitial', 'legacy')); $safeHeadline = Sanitizer::escapeId($safeHeadline); if ($legacyHeadline == $safeHeadline) { # No reason to have both (in fact, we can't) $legacyHeadline = false; } } else { $legacyHeadline = false; $safeHeadline = Sanitizer::escapeId($safeHeadline, 'noninitial'); } # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower($safeHeadline); if ($legacyHeadline === false) { $legacyArrayKey = false; } else { $legacyArrayKey = strtolower($legacyHeadline); } # count how many in assoc. array so we can track dupes in anchors if (isset($refers[$arrayKey])) { $refers[$arrayKey]++; } else { $refers[$arrayKey] = 1; } if (isset($refers[$legacyArrayKey])) { $refers[$legacyArrayKey]++; } else { $refers[$legacyArrayKey] = 1; } # Don't number the heading if it is the only one (looks silly) if (count($matches[3]) > 1 && $this->mOptions->getNumberHeadings()) { # the two are different if the line contains a link $headline = $numbering . ' ' . $headline; } # Create the anchor for linking from the TOC to the section $anchor = $safeHeadline; $legacyAnchor = $legacyHeadline; if ($refers[$arrayKey] > 1) { $anchor .= '_' . $refers[$arrayKey]; } if ($legacyHeadline !== false && $refers[$legacyArrayKey] > 1) { $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if ($enoughToc && (!isset($wgMaxTocLevel) || $toclevel < $wgMaxTocLevel)) { $toc .= Linker::tocLine($anchor, $tocline, $numbering, $toclevel, $isTemplate ? false : $sectionIndex); } # Add the section to the section tree # Find the DOM node for this header while ($node && !$isTemplate) { if ($node->getName() === 'h') { $bits = $node->splitHeading(); if ($bits['i'] == $sectionIndex) { break; } } $byteOffset += mb_strlen($this->mStripState->unstripBoth($frame->expand($node, PPFrame::RECOVER_ORIG))); $node = $node->getNextSibling(); } $tocraw[] = array('toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering, 'index' => ($isTemplate ? 'T-' : '') . $sectionIndex, 'fromtitle' => $titleText, 'byteoffset' => $isTemplate ? null : $byteOffset, 'anchor' => $anchor); # give headline the correct <h#> tag if ($sectionIndex !== false) { // Output edit section links as markers with styles that can be customized by skins if ($isTemplate) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside <includeonly> should be counted. $editlinkArgs = array($titleText, "T-{$sectionIndex}"); } else { $editlinkArgs = array($this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint); } // We use a bit of pesudo-xml for editsection markers. The language converter is run later on // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped // so we don't have to worry about a user trying to input one of these markers directly. // We use a page and section attribute to stop the language converter from converting these important bits // of data, but put the headline hint inside a content block because the language converter is supposed to // be able to convert that piece of data. $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) . '"'; if (isset($editlinkArgs[2])) { $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { $editlink .= '/>'; } } else { $editlink = ''; } $head[$headlineCount] = Linker::makeHeadline($level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor); $headlineCount++; } $this->setOutputType($oldType); # Never ever show TOC if no headers if ($numVisible < 1) { $enoughToc = false; } if ($enoughToc) { if ($prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel) { $toc .= Linker::tocUnindent($prevtoclevel - 1); } $toc = Linker::tocList($toc, $this->mOptions->getUserLang()); $this->mOutput->setTOCHTML($toc); } if ($isMain) { $this->mOutput->setSections($tocraw); } # split up and insert constructed headlines $blocks = preg_split('/<H[1-6].*?' . '>.*?<\\/H[1-6]>/i', $text); $i = 0; foreach ($blocks as $block) { if ($showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n") { # This is the [edit] link that appears for the top block of text when # section editing is enabled # Disabled because it broke block formatting # For example, a bullet point in the top line # $full .= $sk->editSectionLink(0); } $full .= $block; if ($enoughToc && !$i && $isMain && !$this->mForceTocPosition) { # Top anchor now in skin $full = $full . $toc; } if (!empty($head[$i])) { $full .= $head[$i]; } $i++; } if ($this->mForceTocPosition) { return str_replace('<!--MWTOC-->', $toc, $full); } else { return $full; } }
/** * Get Parser instance that's suitable for passing it to CoreParserFunctions * in MessageCache::transform() * * @author Władysław Bodzek <*****@*****.**> * * @param ParserOptions $popts * @return Parser */ function getParserFor(ParserOptions $popts) { $interfaceMessage = $popts->getInterfaceMessage(); $userLanguage = $popts->getUserLang(); $hash = array(); foreach (array($interfaceMessage, $userLanguage) as $obj) { if (is_object($obj)) { $hash[] = get_class($obj); } else { $hash[] = serialize($obj); } } $hash = implode('|', $hash); if (!isset(self::$parsersCache[$hash])) { if (count(self::$parsersCache) > 25) { foreach (self::$parsersCache as $parser) { ParserPool::release($parser); } } $parser = ParserPool::get(); $parser->startExternalParse(new Title('DoesntExistXYZ'), $popts, Parser::OT_PREPROCESS, true); self::$parsersCache[$hash] = $parser; } return self::$parsersCache[$hash]; }