/** * Get headings on the page. * @return string[] * First strip out things that look like references. We can't use HTML filtering because * the references come back as <sup> tags without a class. To keep from breaking stuff like * ==Applicability of the strict mass–energy equivalence formula, ''E'' = ''mc''<sup>2</sup>== * we don't remove the whole <sup> tag. We also don't want to strip the <sup> tag and remove * everything that looks like [2] because, I dunno, maybe there is a band named Word [2] Foo * or something. Whatever. So we only strip things that look like <sup> tags wrapping a * reference. And since the data looks like: * Reference in heading <sup>[1]</sup><sup>[2]</sup> * we can not really use HtmlFormatter as we have no suitable selector. */ public function headings() { $headings = []; $ignoredHeadings = $this->getIgnoredHeadings(); foreach ($this->parserOutput->getSections() as $heading) { $heading = $heading['line']; // Some wikis wrap the brackets in a span: // https://en.wikipedia.org/wiki/MediaWiki:Cite_reference_link $heading = preg_replace('/<\\/?span>/', '', $heading); // Normalize [] so the following regexp would work. $heading = preg_replace(['/[/', '/]/'], ['[', ']'], $heading); $heading = preg_replace('/<sup>\\s*\\[\\s*\\d+\\s*\\]\\s*<\\/sup>/is', '', $heading); // Strip tags from the heading or else we'll display them (escaped) in search results $heading = trim(Sanitizer::stripAllTags($heading)); // Note that we don't take the level of the heading into account - all headings are equal. // Except the ones we ignore. if (!in_array($heading, $ignoredHeadings)) { $headings[] = $heading; } } return $headings; }
public function execute() { // The data is hot but user-dependent, like page views, so we set vary cookies $this->getMain()->setCacheMode('anon-public-user-private'); // Get parameters $params = $this->extractRequestParams(); $text = $params['text']; $title = $params['title']; $page = $params['page']; $pageid = $params['pageid']; $oldid = $params['oldid']; if (!is_null($page) && (!is_null($text) || $title != 'API')) { $this->dieUsage('The page parameter cannot be used together with the text and title parameters', 'params'); } $prop = array_flip($params['prop']); if (isset($params['section'])) { $this->section = $params['section']; } else { $this->section = false; } // The parser needs $wgTitle to be set, apparently the // $title parameter in Parser::parse isn't enough *sigh* // TODO: Does this still need $wgTitle? global $wgParser, $wgTitle, $wgLang; // Currently unnecessary, code to act as a safeguard against any change in current behaviour of uselang breaks $oldLang = null; if (isset($params['uselang']) && $params['uselang'] != $wgLang->getCode()) { $oldLang = $wgLang; // Backup wgLang $wgLang = Language::factory($params['uselang']); } $popts = ParserOptions::newFromContext($this->getContext()); $popts->setTidy(true); $popts->enableLimitReport(!$params['disablepp']); $redirValues = null; // Return result $result = $this->getResult(); if (!is_null($oldid) || !is_null($pageid) || !is_null($page)) { if (!is_null($oldid)) { // Don't use the parser cache $rev = Revision::newFromID($oldid); if (!$rev) { $this->dieUsage("There is no revision ID {$oldid}", 'missingrev'); } if (!$rev->userCan(Revision::DELETED_TEXT, $this->getUser())) { $this->dieUsage("You don't have permission to view deleted revisions", 'permissiondenied'); } $titleObj = $rev->getTitle(); $wgTitle = $titleObj; // If for some reason the "oldid" is actually the current revision, it may be cached if ($titleObj->getLatestRevID() === intval($oldid)) { // May get from/save to parser cache $p_result = $this->getParsedSectionOrText($titleObj, $popts, $pageid, isset($prop['wikitext'])); } else { // This is an old revision, so get the text differently $this->text = $rev->getText(Revision::FOR_THIS_USER, $this->getUser()); if ($this->section !== false) { $this->text = $this->getSectionText($this->text, 'r' . $rev->getId()); } // Should we save old revision parses to the parser cache? $p_result = $wgParser->parse($this->text, $titleObj, $popts); } } else { // Not $oldid, but $pageid or $page if ($params['redirects']) { $reqParams = array('action' => 'query', 'redirects' => ''); if (!is_null($pageid)) { $reqParams['pageids'] = $pageid; } else { // $page $reqParams['titles'] = $page; } $req = new FauxRequest($reqParams); $main = new ApiMain($req); $main->execute(); $data = $main->getResultData(); $redirValues = isset($data['query']['redirects']) ? $data['query']['redirects'] : array(); $to = $page; foreach ((array) $redirValues as $r) { $to = $r['to']; } $titleObj = Title::newFromText($to); } else { if (!is_null($pageid)) { $reqParams['pageids'] = $pageid; $titleObj = Title::newFromID($pageid); } else { // $page $to = $page; $titleObj = Title::newFromText($to); } } if (!is_null($pageid)) { if (!$titleObj) { // Still throw nosuchpageid error if pageid was provided $this->dieUsageMsg(array('nosuchpageid', $pageid)); } } elseif (!$titleObj || !$titleObj->exists()) { $this->dieUsage("The page you specified doesn't exist", 'missingtitle'); } $wgTitle = $titleObj; if (isset($prop['revid'])) { $oldid = $titleObj->getLatestRevID(); } // Potentially cached $p_result = $this->getParsedSectionOrText($titleObj, $popts, $pageid, isset($prop['wikitext'])); } } else { // Not $oldid, $pageid, $page. Hence based on $text if (is_null($text)) { $this->dieUsage('The text parameter should be passed with the title parameter. Should you be using the "page" parameter instead?', 'params'); } $this->text = $text; $titleObj = Title::newFromText($title); if (!$titleObj) { $this->dieUsageMsg(array('invalidtitle', $title)); } $wgTitle = $titleObj; if ($this->section !== false) { $this->text = $this->getSectionText($this->text, $titleObj->getText()); } if ($params['pst'] || $params['onlypst']) { $this->pstText = $wgParser->preSaveTransform($this->text, $titleObj, $this->getUser(), $popts); } if ($params['onlypst']) { // Build a result and bail out $result_array = array(); $result_array['text'] = array(); $result->setContent($result_array['text'], $this->pstText); if (isset($prop['wikitext'])) { $result_array['wikitext'] = array(); $result->setContent($result_array['wikitext'], $this->text); } $result->addValue(null, $this->getModuleName(), $result_array); return; } // Not cached (save or load) $p_result = $wgParser->parse($params['pst'] ? $this->pstText : $this->text, $titleObj, $popts); } $result_array = array(); $result_array['title'] = $titleObj->getPrefixedText(); if (!is_null($oldid)) { $result_array['revid'] = intval($oldid); } if ($params['redirects'] && !is_null($redirValues)) { $result_array['redirects'] = $redirValues; } /* Wikia Change Starts */ if (!$p_result instanceof ParserOutput) { \Wikia\Logger\WikiaLogger::instance()->error('ApiParse no ParserOutput', ['pageTitle' => $title]); // generate empty parser output to support code working on it $p_result = new ParserOutput(''); } /* Wikia Change Ends */ if (isset($prop['text'])) { $result_array['text'] = array(); $result->setContent($result_array['text'], $p_result->getText()); } if (!is_null($params['summary'])) { $result_array['parsedsummary'] = array(); $result->setContent($result_array['parsedsummary'], Linker::formatComment($params['summary'], $titleObj)); } if (isset($prop['langlinks'])) { $result_array['langlinks'] = $this->formatLangLinks($p_result->getLanguageLinks()); } if (isset($prop['languageshtml'])) { $languagesHtml = $this->languagesHtml($p_result->getLanguageLinks()); $result_array['languageshtml'] = array(); $result->setContent($result_array['languageshtml'], $languagesHtml); } if (isset($prop['categories'])) { $result_array['categories'] = $this->formatCategoryLinks($p_result->getCategories()); } if (isset($prop['categorieshtml'])) { $categoriesHtml = $this->categoriesHtml($p_result->getCategories()); $result_array['categorieshtml'] = array(); $result->setContent($result_array['categorieshtml'], $categoriesHtml); } if (isset($prop['links'])) { $result_array['links'] = $this->formatLinks($p_result->getLinks()); } if (isset($prop['templates'])) { $result_array['templates'] = $this->formatLinks($p_result->getTemplates()); } if (isset($prop['images'])) { $result_array['images'] = array_keys($p_result->getImages()); } if (isset($prop['externallinks'])) { $result_array['externallinks'] = array_keys($p_result->getExternalLinks()); } if (isset($prop['sections'])) { $result_array['sections'] = $p_result->getSections(); } if (isset($prop['displaytitle'])) { $result_array['displaytitle'] = $p_result->getDisplayTitle() ? $p_result->getDisplayTitle() : $titleObj->getPrefixedText(); } if (isset($prop['headitems']) || isset($prop['headhtml'])) { $context = $this->getContext(); $context->setTitle($titleObj); $context->getOutput()->addParserOutputNoText($p_result); if (isset($prop['headitems'])) { $headItems = $this->formatHeadItems($p_result->getHeadItems()); $css = $this->formatCss($context->getOutput()->buildCssLinksArray()); $scripts = array($context->getOutput()->getHeadScripts()); $result_array['headitems'] = array_merge($headItems, $css, $scripts); } if (isset($prop['headhtml'])) { $result_array['headhtml'] = array(); $result->setContent($result_array['headhtml'], $context->getOutput()->headElement($context->getSkin())); } } if (isset($prop['iwlinks'])) { $result_array['iwlinks'] = $this->formatIWLinks($p_result->getInterwikiLinks()); } if (isset($prop['wikitext'])) { $result_array['wikitext'] = array(); $result->setContent($result_array['wikitext'], $this->text); if (!is_null($this->pstText)) { $result_array['psttext'] = array(); $result->setContent($result_array['psttext'], $this->pstText); } } $result_mapping = array('redirects' => 'r', 'langlinks' => 'll', 'categories' => 'cl', 'links' => 'pl', 'templates' => 'tl', 'images' => 'img', 'externallinks' => 'el', 'iwlinks' => 'iw', 'sections' => 's', 'headitems' => 'hi'); $this->setIndexedTagNames($result_array, $result_mapping); $result->addValue(null, $this->getModuleName(), $result_array); if (!is_null($oldLang)) { $wgLang = $oldLang; // Reset $wgLang to $oldLang } }