Пример #1
0
 /**
  * This function accomplishes several tasks:
  * 1) Auto-number headings if that option is enabled
  * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
  * 3) Add a Table of contents on the top for users who have enabled the option
  * 4) Auto-anchor headings
  *
  * It loops through all headlines, collects the necessary data, then splits up the
  * string and re-inserts the newly formatted headlines.
  *
  * @param $text String
  * @param $origText String: original, untouched wikitext
  * @param $isMain Boolean
  * @private
  */
 function formatHeadings($text, $origText, $isMain = true)
 {
     global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds;
     # Inhibit editsection links if requested in the page
     if (isset($this->mDoubleUnderscores['noeditsection'])) {
         $showEditLink = 0;
     } else {
         $showEditLink = $this->mOptions->getEditSection();
     }
     if ($showEditLink) {
         $this->mOutput->setEditSectionTokens(true);
     }
     # Get all headlines for numbering them and adding funky stuff like [edit]
     # links - this is for later, but we need the number of headlines right now
     $matches = array();
     $numMatches = preg_match_all('/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)(?P<header>.*?)<\\/H[1-6] *>/i', $text, $matches);
     # if there are fewer than 4 headlines in the article, do not show TOC
     # unless it's been explicitly enabled.
     $enoughToc = $this->mShowToc && ($numMatches >= 4 || $this->mForceTocPosition);
     # Allow user to stipulate that a page should have a "new section"
     # link added via __NEWSECTIONLINK__
     if (isset($this->mDoubleUnderscores['newsectionlink'])) {
         $this->mOutput->setNewSection(true);
     }
     # Allow user to remove the "new section"
     # link via __NONEWSECTIONLINK__
     if (isset($this->mDoubleUnderscores['nonewsectionlink'])) {
         $this->mOutput->hideNewSection(true);
     }
     # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
     # override above conditions and always show TOC above first header
     if (isset($this->mDoubleUnderscores['forcetoc'])) {
         $this->mShowToc = true;
         $enoughToc = true;
     }
     # headline counter
     $headlineCount = 0;
     $numVisible = 0;
     # Ugh .. the TOC should have neat indentation levels which can be
     # passed to the skin functions. These are determined here
     $toc = '';
     $full = '';
     $head = array();
     $sublevelCount = array();
     $levelCount = array();
     $level = 0;
     $prevlevel = 0;
     $toclevel = 0;
     $prevtoclevel = 0;
     $markerRegex = "{$this->mUniqPrefix}-h-(\\d+)-" . self::MARKER_SUFFIX;
     $baseTitleText = $this->mTitle->getPrefixedDBkey();
     $oldType = $this->mOutputType;
     $this->setOutputType(self::OT_WIKI);
     $frame = $this->getPreprocessor()->newFrame();
     $root = $this->preprocessToDom($origText);
     $node = $root->getFirstChild();
     $byteOffset = 0;
     $tocraw = array();
     $refers = array();
     foreach ($matches[3] as $headline) {
         $isTemplate = false;
         $titleText = false;
         $sectionIndex = false;
         $numbering = '';
         $markerMatches = array();
         if (preg_match("/^{$markerRegex}/", $headline, $markerMatches)) {
             $serial = $markerMatches[1];
             list($titleText, $sectionIndex) = $this->mHeadings[$serial];
             $isTemplate = $titleText != $baseTitleText;
             $headline = preg_replace("/^{$markerRegex}/", "", $headline);
         }
         if ($toclevel) {
             $prevlevel = $level;
         }
         $level = $matches[1][$headlineCount];
         if ($level > $prevlevel) {
             # Increase TOC level
             $toclevel++;
             $sublevelCount[$toclevel] = 0;
             if ($toclevel < $wgMaxTocLevel) {
                 $prevtoclevel = $toclevel;
                 $toc .= Linker::tocIndent();
                 $numVisible++;
             }
         } elseif ($level < $prevlevel && $toclevel > 1) {
             # Decrease TOC level, find level to jump to
             for ($i = $toclevel; $i > 0; $i--) {
                 if ($levelCount[$i] == $level) {
                     # Found last matching level
                     $toclevel = $i;
                     break;
                 } elseif ($levelCount[$i] < $level) {
                     # Found first matching level below current level
                     $toclevel = $i + 1;
                     break;
                 }
             }
             if ($i == 0) {
                 $toclevel = 1;
             }
             if ($toclevel < $wgMaxTocLevel) {
                 if ($prevtoclevel < $wgMaxTocLevel) {
                     # Unindent only if the previous toc level was shown :p
                     $toc .= Linker::tocUnindent($prevtoclevel - $toclevel);
                     $prevtoclevel = $toclevel;
                 } else {
                     $toc .= Linker::tocLineEnd();
                 }
             }
         } else {
             # No change in level, end TOC line
             if ($toclevel < $wgMaxTocLevel) {
                 $toc .= Linker::tocLineEnd();
             }
         }
         $levelCount[$toclevel] = $level;
         # count number of headlines for each level
         @$sublevelCount[$toclevel]++;
         $dot = 0;
         for ($i = 1; $i <= $toclevel; $i++) {
             if (!empty($sublevelCount[$i])) {
                 if ($dot) {
                     $numbering .= '.';
                 }
                 $numbering .= $this->getFunctionLang()->formatNum($sublevelCount[$i]);
                 $dot = 1;
             }
         }
         # The safe header is a version of the header text safe to use for links
         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
         $safeHeadline = $this->mStripState->unstripBoth($headline);
         # Remove link placeholders by the link text.
         #     <!--LINK number-->
         # turns into
         #     link text with suffix
         $safeHeadline = $this->replaceLinkHoldersText($safeHeadline);
         # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
         $tocline = preg_replace(array('#<(?!/?(sup|sub)).*?' . '>#', '#<(/?(sup|sub)).*?' . '>#'), array('', '<$1>'), $safeHeadline);
         $tocline = trim($tocline);
         # For the anchor, strip out HTML-y stuff period
         $safeHeadline = preg_replace('/<.*?' . '>/', '', $safeHeadline);
         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace($safeHeadline);
         # Save headline for section edit hint before it's escaped
         $headlineHint = $safeHeadline;
         if ($wgHtml5 && $wgExperimentalHtmlIds) {
             # For reverse compatibility, provide an id that's
             # HTML4-compatible, like we used to.
             #
             # It may be worth noting, academically, that it's possible for
             # the legacy anchor to conflict with a non-legacy headline
             # anchor on the page.  In this case likely the "correct" thing
             # would be to either drop the legacy anchors or make sure
             # they're numbered first.  However, this would require people
             # to type in section names like "abc_.D7.93.D7.90.D7.A4"
             # manually, so let's not bother worrying about it.
             $legacyHeadline = Sanitizer::escapeId($safeHeadline, array('noninitial', 'legacy'));
             $safeHeadline = Sanitizer::escapeId($safeHeadline);
             if ($legacyHeadline == $safeHeadline) {
                 # No reason to have both (in fact, we can't)
                 $legacyHeadline = false;
             }
         } else {
             $legacyHeadline = false;
             $safeHeadline = Sanitizer::escapeId($safeHeadline, 'noninitial');
         }
         # HTML names must be case-insensitively unique (bug 10721).
         # This does not apply to Unicode characters per
         # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
         # @todo FIXME: We may be changing them depending on the current locale.
         $arrayKey = strtolower($safeHeadline);
         if ($legacyHeadline === false) {
             $legacyArrayKey = false;
         } else {
             $legacyArrayKey = strtolower($legacyHeadline);
         }
         # count how many in assoc. array so we can track dupes in anchors
         if (isset($refers[$arrayKey])) {
             $refers[$arrayKey]++;
         } else {
             $refers[$arrayKey] = 1;
         }
         if (isset($refers[$legacyArrayKey])) {
             $refers[$legacyArrayKey]++;
         } else {
             $refers[$legacyArrayKey] = 1;
         }
         # Don't number the heading if it is the only one (looks silly)
         if (count($matches[3]) > 1 && $this->mOptions->getNumberHeadings()) {
             # the two are different if the line contains a link
             $headline = $numbering . ' ' . $headline;
         }
         # Create the anchor for linking from the TOC to the section
         $anchor = $safeHeadline;
         $legacyAnchor = $legacyHeadline;
         if ($refers[$arrayKey] > 1) {
             $anchor .= '_' . $refers[$arrayKey];
         }
         if ($legacyHeadline !== false && $refers[$legacyArrayKey] > 1) {
             $legacyAnchor .= '_' . $refers[$legacyArrayKey];
         }
         if ($enoughToc && (!isset($wgMaxTocLevel) || $toclevel < $wgMaxTocLevel)) {
             $toc .= Linker::tocLine($anchor, $tocline, $numbering, $toclevel, $isTemplate ? false : $sectionIndex);
         }
         # Add the section to the section tree
         # Find the DOM node for this header
         while ($node && !$isTemplate) {
             if ($node->getName() === 'h') {
                 $bits = $node->splitHeading();
                 if ($bits['i'] == $sectionIndex) {
                     break;
                 }
             }
             $byteOffset += mb_strlen($this->mStripState->unstripBoth($frame->expand($node, PPFrame::RECOVER_ORIG)));
             $node = $node->getNextSibling();
         }
         $tocraw[] = array('toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering, 'index' => ($isTemplate ? 'T-' : '') . $sectionIndex, 'fromtitle' => $titleText, 'byteoffset' => $isTemplate ? null : $byteOffset, 'anchor' => $anchor);
         # give headline the correct <h#> tag
         if ($sectionIndex !== false) {
             // Output edit section links as markers with styles that can be customized by skins
             if ($isTemplate) {
                 # Put a T flag in the section identifier, to indicate to extractSections()
                 # that sections inside <includeonly> should be counted.
                 $editlinkArgs = array($titleText, "T-{$sectionIndex}");
             } else {
                 $editlinkArgs = array($this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint);
             }
             // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
             // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
             // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
             // so we don't have to worry about a user trying to input one of these markers directly.
             // We use a page and section attribute to stop the language converter from converting these important bits
             // of data, but put the headline hint inside a content block because the language converter is supposed to
             // be able to convert that piece of data.
             $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
             $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) . '"';
             if (isset($editlinkArgs[2])) {
                 $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
             } else {
                 $editlink .= '/>';
             }
         } else {
             $editlink = '';
         }
         $head[$headlineCount] = Linker::makeHeadline($level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor);
         $headlineCount++;
     }
     $this->setOutputType($oldType);
     # Never ever show TOC if no headers
     if ($numVisible < 1) {
         $enoughToc = false;
     }
     if ($enoughToc) {
         if ($prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel) {
             $toc .= Linker::tocUnindent($prevtoclevel - 1);
         }
         $toc = Linker::tocList($toc, $this->mOptions->getUserLang());
         $this->mOutput->setTOCHTML($toc);
     }
     if ($isMain) {
         $this->mOutput->setSections($tocraw);
     }
     # split up and insert constructed headlines
     $blocks = preg_split('/<H[1-6].*?' . '>.*?<\\/H[1-6]>/i', $text);
     $i = 0;
     foreach ($blocks as $block) {
         if ($showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n") {
             # This is the [edit] link that appears for the top block of text when
             # section editing is enabled
             # Disabled because it broke block formatting
             # For example, a bullet point in the top line
             # $full .= $sk->editSectionLink(0);
         }
         $full .= $block;
         if ($enoughToc && !$i && $isMain && !$this->mForceTocPosition) {
             # Top anchor now in skin
             $full = $full . $toc;
         }
         if (!empty($head[$i])) {
             $full .= $head[$i];
         }
         $i++;
     }
     if ($this->mForceTocPosition) {
         return str_replace('<!--MWTOC-->', $toc, $full);
     } else {
         return $full;
     }
 }
Пример #2
0
 /**
  * Get Parser instance that's suitable for passing it to CoreParserFunctions
  * in MessageCache::transform()
  *
  * @author Władysław Bodzek <*****@*****.**>
  *
  * @param ParserOptions $popts
  * @return Parser
  */
 function getParserFor(ParserOptions $popts)
 {
     $interfaceMessage = $popts->getInterfaceMessage();
     $userLanguage = $popts->getUserLang();
     $hash = array();
     foreach (array($interfaceMessage, $userLanguage) as $obj) {
         if (is_object($obj)) {
             $hash[] = get_class($obj);
         } else {
             $hash[] = serialize($obj);
         }
     }
     $hash = implode('|', $hash);
     if (!isset(self::$parsersCache[$hash])) {
         if (count(self::$parsersCache) > 25) {
             foreach (self::$parsersCache as $parser) {
                 ParserPool::release($parser);
             }
         }
         $parser = ParserPool::get();
         $parser->startExternalParse(new Title('DoesntExistXYZ'), $popts, Parser::OT_PREPROCESS, true);
         self::$parsersCache[$hash] = $parser;
     }
     return self::$parsersCache[$hash];
 }