/** * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page * 3) Add a Table of contents on the top for users who have enabled the option * 4) Auto-anchor headings * * It loops through all headlines, collects the necessary data, then splits up the * string and re-inserts the newly formatted headlines. * * @param $text String * @param string $origText original, untouched wikitext * @param $isMain Boolean * @return mixed|string * @private */ function formatHeadings($text, $origText, $isMain = true) { global $wgMaxTocLevel, $wgExperimentalHtmlIds; # Inhibit editsection links if requested in the page if (isset($this->mDoubleUnderscores['noeditsection'])) { $maybeShowEditLink = $showEditLink = false; } else { $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */ $showEditLink = $this->mOptions->getEditSection(); } if ($showEditLink) { $this->mOutput->setEditSectionTokens(true); } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now $matches = array(); $numMatches = preg_match_all('/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\\s*(?P<header>[\\s\\S]*?)\\s*<\\/H[1-6] *>/i', $text, $matches); # if there are fewer than 4 headlines in the article, do not show TOC # unless it's been explicitly enabled. $enoughToc = $this->mShowToc && ($numMatches >= 4 || $this->mForceTocPosition); # Allow user to stipulate that a page should have a "new section" # link added via __NEWSECTIONLINK__ if (isset($this->mDoubleUnderscores['newsectionlink'])) { $this->mOutput->setNewSection(true); } # Allow user to remove the "new section" # link via __NONEWSECTIONLINK__ if (isset($this->mDoubleUnderscores['nonewsectionlink'])) { $this->mOutput->hideNewSection(true); } # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header if (isset($this->mDoubleUnderscores['forcetoc'])) { $this->mShowToc = true; $enoughToc = true; } # headline counter $headlineCount = 0; $numVisible = 0; # Ugh .. the TOC should have neat indentation levels which can be # passed to the skin functions. These are determined here $toc = ''; $full = ''; $head = array(); $sublevelCount = array(); $levelCount = array(); $level = 0; $prevlevel = 0; $toclevel = 0; $prevtoclevel = 0; $markerRegex = "{$this->mUniqPrefix}-h-(\\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->mTitle->getPrefixedDBkey(); $oldType = $this->mOutputType; $this->setOutputType(self::OT_WIKI); $frame = $this->getPreprocessor()->newFrame(); $root = $this->preprocessToDom($origText); $node = $root->getFirstChild(); $byteOffset = 0; $tocraw = array(); $refers = array(); foreach ($matches[3] as $headline) { $isTemplate = false; $titleText = false; $sectionIndex = false; $numbering = ''; $markerMatches = array(); if (preg_match("/^{$markerRegex}/", $headline, $markerMatches)) { $serial = $markerMatches[1]; list($titleText, $sectionIndex) = $this->mHeadings[$serial]; $isTemplate = $titleText != $baseTitleText; $headline = preg_replace("/^{$markerRegex}\\s*/", "", $headline); } if ($toclevel) { $prevlevel = $level; } $level = $matches[1][$headlineCount]; if ($level > $prevlevel) { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; if ($toclevel < $wgMaxTocLevel) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; } } elseif ($level < $prevlevel && $toclevel > 1) { # Decrease TOC level, find level to jump to for ($i = $toclevel; $i > 0; $i--) { if ($levelCount[$i] == $level) { # Found last matching level $toclevel = $i; break; } elseif ($levelCount[$i] < $level) { # Found first matching level below current level $toclevel = $i + 1; break; } } if ($i == 0) { $toclevel = 1; } if ($toclevel < $wgMaxTocLevel) { if ($prevtoclevel < $wgMaxTocLevel) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent($prevtoclevel - $toclevel); $prevtoclevel = $toclevel; } else { $toc .= Linker::tocLineEnd(); } } } else { # No change in level, end TOC line if ($toclevel < $wgMaxTocLevel) { $toc .= Linker::tocLineEnd(); } } $levelCount[$toclevel] = $level; # count number of headlines for each level $sublevelCount[$toclevel]++; $dot = 0; for ($i = 1; $i <= $toclevel; $i++) { if (!empty($sublevelCount[$i])) { if ($dot) { $numbering .= '.'; } $numbering .= $this->getTargetLanguage()->formatNum($sublevelCount[$i]); $dot = 1; } } # The safe header is a version of the header text safe to use for links # Remove link placeholders by the link text. # <!--LINK number--> # turns into # link text with suffix # Do this before unstrip since link text can contain strip markers $safeHeadline = $this->replaceLinkHoldersText($headline); # Avoid insertion of weird stuff like <math> by expanding the relevant sections $safeHeadline = $this->mStripState->unstripBoth($safeHeadline); # Strip out HTML (first regex removes any tag not allowed) # Allowed tags are: # * <sup> and <sub> (bug 8393) # * <i> (bug 26375) # * <b> (r105284) # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) # # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, # to allow setting directionality in toc items. $tocline = preg_replace(array('#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#'), array('', '<$1>'), $safeHeadline); $tocline = trim($tocline); # For the anchor, strip out HTML-y stuff period $safeHeadline = preg_replace('/<.*?' . '>/', '', $safeHeadline); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace($safeHeadline); # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; if ($wgExperimentalHtmlIds) { # For reverse compatibility, provide an id that's # HTML4-compatible, like we used to. # # It may be worth noting, academically, that it's possible for # the legacy anchor to conflict with a non-legacy headline # anchor on the page. In this case likely the "correct" thing # would be to either drop the legacy anchors or make sure # they're numbered first. However, this would require people # to type in section names like "abc_.D7.93.D7.90.D7.A4" # manually, so let's not bother worrying about it. $legacyHeadline = Sanitizer::escapeId($safeHeadline, array('noninitial', 'legacy')); $safeHeadline = Sanitizer::escapeId($safeHeadline); if ($legacyHeadline == $safeHeadline) { # No reason to have both (in fact, we can't) $legacyHeadline = false; } } else { $legacyHeadline = false; $safeHeadline = Sanitizer::escapeId($safeHeadline, 'noninitial'); } # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower($safeHeadline); if ($legacyHeadline === false) { $legacyArrayKey = false; } else { $legacyArrayKey = strtolower($legacyHeadline); } # count how many in assoc. array so we can track dupes in anchors if (isset($refers[$arrayKey])) { $refers[$arrayKey]++; } else { $refers[$arrayKey] = 1; } if (isset($refers[$legacyArrayKey])) { $refers[$legacyArrayKey]++; } else { $refers[$legacyArrayKey] = 1; } # Don't number the heading if it is the only one (looks silly) if (count($matches[3]) > 1 && $this->mOptions->getNumberHeadings()) { # the two are different if the line contains a link $headline = Html::element('span', array('class' => 'mw-headline-number'), $numbering) . ' ' . $headline; } # Create the anchor for linking from the TOC to the section $anchor = $safeHeadline; $legacyAnchor = $legacyHeadline; if ($refers[$arrayKey] > 1) { $anchor .= '_' . $refers[$arrayKey]; } if ($legacyHeadline !== false && $refers[$legacyArrayKey] > 1) { $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if ($enoughToc && (!isset($wgMaxTocLevel) || $toclevel < $wgMaxTocLevel)) { $toc .= Linker::tocLine($anchor, $tocline, $numbering, $toclevel, $isTemplate ? false : $sectionIndex); } # Add the section to the section tree # Find the DOM node for this header $noOffset = $isTemplate || $sectionIndex === false; while ($node && !$noOffset) { if ($node->getName() === 'h') { $bits = $node->splitHeading(); if ($bits['i'] == $sectionIndex) { break; } } $byteOffset += mb_strlen($this->mStripState->unstripBoth($frame->expand($node, PPFrame::RECOVER_ORIG))); $node = $node->getNextSibling(); } $tocraw[] = array('toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering, 'index' => ($isTemplate ? 'T-' : '') . $sectionIndex, 'fromtitle' => $titleText, 'byteoffset' => $noOffset ? null : $byteOffset, 'anchor' => $anchor); # give headline the correct <h#> tag if ($maybeShowEditLink && $sectionIndex !== false) { // Output edit section links as markers with styles that can be customized by skins if ($isTemplate) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside <includeonly> should be counted. $editlinkArgs = array($titleText, "T-{$sectionIndex}"); } else { $editlinkArgs = array($this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint); } // We use a bit of pesudo-xml for editsection markers. The language converter is run later on // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped // so we don't have to worry about a user trying to input one of these markers directly. // We use a page and section attribute to stop the language converter from converting these important bits // of data, but put the headline hint inside a content block because the language converter is supposed to // be able to convert that piece of data. $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) . '"'; if (isset($editlinkArgs[2])) { $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { $editlink .= '/>'; } } else { $editlink = ''; } $head[$headlineCount] = Linker::makeHeadline($level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor); $headlineCount++; } $this->setOutputType($oldType); # Never ever show TOC if no headers if ($numVisible < 1) { $enoughToc = false; } if ($enoughToc) { if ($prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel) { $toc .= Linker::tocUnindent($prevtoclevel - 1); } $toc = Linker::tocList($toc, $this->mOptions->getUserLangObj()); $this->mOutput->setTOCHTML($toc); $toc = self::TOC_START . $toc . self::TOC_END; } if ($isMain) { $this->mOutput->setSections($tocraw); } # split up and insert constructed headlines $blocks = preg_split('/<H[1-6].*?' . '>[\\s\\S]*?<\\/H[1-6]>/i', $text); $i = 0; // build an array of document sections $sections = array(); foreach ($blocks as $block) { // $head is zero-based, sections aren't. if (empty($head[$i - 1])) { $sections[$i] = $block; } else { $sections[$i] = $head[$i - 1] . $block; } /** * Send a hook, one per section. * The idea here is to be able to make section-level DIVs, but to do so in a * lower-impact, more correct way than r50769 * * $this : caller * $section : the section number * &$sectionContent : ref to the content of the section * $showEditLinks : boolean describing whether this section has an edit link */ wfRunHooks('ParserSectionCreate', array($this, $i, &$sections[$i], $showEditLink)); $i++; } if ($enoughToc && $isMain && !$this->mForceTocPosition) { // append the TOC at the beginning // Top anchor now in skin $sections[0] = $sections[0] . $toc . "\n"; } $full .= join('', $sections); if ($this->mForceTocPosition) { return str_replace('<!--MWTOC-->', $toc, $full); } else { return $full; } }