/** * Extract parts of the text - opening, main and auxiliary. */ private function extractWikitextParts() { if (!is_null($this->allText)) { return; } $this->parserOutput->setEditSectionTokens(false); $this->parserOutput->setTOCEnabled(false); $text = $this->parserOutput->getText(); if (strlen($text) == 0) { $this->allText = ""; // empty text - nothing to seek here return; } $opening = null; $this->openingText = $this->extractHeadingBeforeFirstHeading($text); // Add extra spacing around break tags so text crammed together like<br>this // doesn't make one word. $text = str_replace('<br', "\n<br", $text); $formatter = new HtmlFormatter($text); // Strip elements from the page that we never want in the search text. $formatter->remove($this->excludedElementSelectors); $formatter->filterContent(); // Strip elements from the page that are auxiliary text. These will still be // searched but matches will be ranked lower and non-auxiliary matches will be // preferred in highlighting. $formatter->remove($this->auxiliaryElementSelectors); $auxiliaryElements = $formatter->filterContent(); $this->allText = trim(Sanitizer::stripAllTags($formatter->getText())); foreach ($auxiliaryElements as $auxiliaryElement) { $this->auxText[] = trim(Sanitizer::stripAllTags($formatter->getText($auxiliaryElement))); } }