/** * Build the result. * @param $results \Elastica\ResultSet containing all search results * @param $result \Elastica\Result containing the given search result * @param string $interwiki Interwiki prefix, if any * @param $result \Elastic\Result containing information about the result this class should represent */ public function __construct($results, $result, $interwiki = '') { if ($interwiki) { $this->setInterwiki($result, $interwiki); } $this->docId = $result->getId(); $this->mTitle = Title::makeTitle($result->namespace, $result->title, '', $this->interwiki); if ($this->getTitle()->getNamespace() == NS_FILE) { $this->mImage = wfFindFile($this->mTitle); } $fields = $result->getFields(); // Not all results requested a word count. Just pretend we have none if so $this->wordCount = isset($fields['text.word_count']) ? $fields['text.word_count'][0] : 0; $this->byteSize = $result->text_bytes; $this->timestamp = new MWTimestamp($result->timestamp); $highlights = $result->getHighlights(); if (isset($highlights['title'])) { $nstext = $this->getTitle()->getNamespace() === 0 ? '' : Util::getNamespaceText($this->getTitle()) . ':'; $this->titleSnippet = $nstext . $this->escapeHighlightedText($highlights['title'][0]); } elseif ($this->mTitle->isExternal()) { // Interwiki searches are weird. They won't have title highlights by design, but // if we don't return a title snippet we'll get weird display results. $nsText = $this->getInterwikiNamespaceText(); $titleText = $this->mTitle->getText(); $this->titleSnippet = $nsText ? "{$nsText}:{$titleText}" : $titleText; } if (!isset($highlights['title']) && isset($highlights['redirect.title'])) { // Make sure to find the redirect title before escaping because escaping breaks it.... $redirects = $result->redirect; $this->redirectTitle = $this->findRedirectTitle($highlights['redirect.title'][0], $redirects); $this->redirectSnipppet = $this->escapeHighlightedText($highlights['redirect.title'][0]); } $this->textSnippet = $this->escapeHighlightedText($this->pickTextSnippet($highlights)); if (isset($highlights['heading'])) { $this->sectionSnippet = $this->escapeHighlightedText($highlights['heading'][0]); $this->sectionTitle = $this->findSectionTitle(); } if (isset($highlights['category'])) { $this->categorySnippet = $this->escapeHighlightedText($highlights['category'][0]); } }
/** * @param \WikiPage[] $pages * @param int $flags */ private function buildDocumentsForPages($pages, $flags) { global $wgCirrusSearchUpdateConflictRetryCount; $indexOnSkip = $flags & self::INDEX_ON_SKIP; $skipParse = $flags & self::SKIP_PARSE; $skipLinks = $flags & self::SKIP_LINKS; $forceParse = $flags & self::FORCE_PARSE; $fullDocument = !($skipParse || $skipLinks); $documents = array(); foreach ($pages as $page) { $title = $page->getTitle(); if (!$page->exists()) { LoggerFactory::getInstance('CirrusSearch')->warning('Attempted to build a document for a page that doesn\'t exist. This should be caught ' . "earlier but wasn't. Page: {title}", array('title' => $title)); continue; } $doc = new \Elastica\Document($page->getId(), array('version' => $page->getLatest(), 'version_type' => 'external', 'namespace' => $title->getNamespace(), 'namespace_text' => Util::getNamespaceText($title), 'title' => $title->getText(), 'timestamp' => wfTimestamp(TS_ISO_8601, $page->getTimestamp()))); // Everything as sent as an update to prevent overwriting fields maintained in other processes like // OtherIndex::updateOtherIndex. // But we need a way to index documents that don't already exist. We're willing to upsert any full // documents or any documents that we've been explicitly told it is ok to index when they aren't full. // This is typically just done during the first phase of the initial index build. // A quick note about docAsUpsert's merging behavior: It overwrites all fields provided by doc unless they // are objects in both doc and the indexed source. We're ok with this because all of our fields are either // regular types or lists of objects and lists are overwritten. $doc->setDocAsUpsert($fullDocument || $indexOnSkip); $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount); if (!$skipParse) { // Get text to index, based on content and parser output list($content, $parserOutput) = $this->getContentAndParserOutput($page, $forceParse); // Build our page data $pageBuilder = new PageDataBuilder($doc, $title, $content, $parserOutput); $doc = $pageBuilder->build(); // And build the page text itself $textBuilder = new PageTextBuilder($doc, $content, $parserOutput); $doc = $textBuilder->build(); // If we're a file, build its metadata too if ($title->getNamespace() === NS_FILE) { $fileBuilder = new FileDataBuilder($doc, $title); $doc = $fileBuilder->build(); } // Then let hooks have a go MWHooks::run('CirrusSearchBuildDocumentParse', array($doc, $title, $content, $parserOutput, $this->connection)); } if (!$skipLinks) { MWHooks::run('CirrusSearchBuildDocumentLinks', array($doc, $title, $this->connection)); } $documents[] = $doc; } MWHooks::run('CirrusSearchBuildDocumentFinishBatch', array($pages)); return $documents; }