Пример #1
0
 /**
  * Extract parts of the text - opening, main and auxiliary.
  */
 private function extractWikitextParts()
 {
     if (!is_null($this->allText)) {
         return;
     }
     $this->parserOutput->setEditSectionTokens(false);
     $this->parserOutput->setTOCEnabled(false);
     $text = $this->parserOutput->getText();
     if (strlen($text) == 0) {
         $this->allText = "";
         // empty text - nothing to seek here
         return;
     }
     $opening = null;
     $this->openingText = $this->extractHeadingBeforeFirstHeading($text);
     // Add extra spacing around break tags so text crammed together like<br>this
     // doesn't make one word.
     $text = str_replace('<br', "\n<br", $text);
     $formatter = new HtmlFormatter($text);
     // Strip elements from the page that we never want in the search text.
     $formatter->remove($this->excludedElementSelectors);
     $formatter->filterContent();
     // Strip elements from the page that are auxiliary text.  These will still be
     // searched but matches will be ranked lower and non-auxiliary matches will be
     // preferred in highlighting.
     $formatter->remove($this->auxiliaryElementSelectors);
     $auxiliaryElements = $formatter->filterContent();
     $this->allText = trim(Sanitizer::stripAllTags($formatter->getText()));
     foreach ($auxiliaryElements as $auxiliaryElement) {
         $this->auxText[] = trim(Sanitizer::stripAllTags($formatter->getText($auxiliaryElement)));
     }
 }