/** * Disallow writes to the specified indexes. * * @param string[]|null $indexes List of index types to disallow writes to. * null means to prevent indexing in all indexes across all wikis. */ public function freezeIndexes(array $indexes = null) { global $wgCirrusSearchUpdateConflictRetryCount; if ($indexes === null) { $names = array(self::ALL_INDEXES_FROZEN_NAME); } elseif (count($indexes) === 0) { return; } else { $names = $this->indexesToIndexNames($indexes); } $this->log->info("Freezing writes to: " . implode(',', $names)); $documents = array(); foreach ($names as $indexName) { $doc = new \Elastica\Document($indexName, array('name' => $indexName)); $doc->setDocAsUpsert(true); $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount); $documents[] = $doc; } $client = $this->connection->getClient(); $type = $this->connection->getFrozenIndexNameType(); // Elasticsearch has a queue capacity of 50 so if $data // contains 50 documents it could bump up against the max. So // we chunk it and do them sequentially. foreach (array_chunk($documents, 30) as $data) { $bulk = new \Elastica\Bulk($client); $bulk->setType($type); $bulk->addData($data, 'update'); $bulk->send(); } // Ensure our freeze is immediatly seen (mostly for testing // purposes) $type->getIndex()->refresh(); }
/** * @param \WikiPage[] $pages * @param int $flags */ private function buildDocumentsForPages($pages, $flags) { global $wgCirrusSearchUpdateConflictRetryCount; $indexOnSkip = $flags & self::INDEX_ON_SKIP; $skipParse = $flags & self::SKIP_PARSE; $skipLinks = $flags & self::SKIP_LINKS; $forceParse = $flags & self::FORCE_PARSE; $fullDocument = !($skipParse || $skipLinks); $documents = array(); foreach ($pages as $page) { $title = $page->getTitle(); if (!$page->exists()) { LoggerFactory::getInstance('CirrusSearch')->warning('Attempted to build a document for a page that doesn\'t exist. This should be caught ' . "earlier but wasn't. Page: {title}", array('title' => $title)); continue; } $doc = new \Elastica\Document($page->getId(), array('version' => $page->getLatest(), 'version_type' => 'external', 'namespace' => $title->getNamespace(), 'namespace_text' => Util::getNamespaceText($title), 'title' => $title->getText(), 'timestamp' => wfTimestamp(TS_ISO_8601, $page->getTimestamp()))); // Everything as sent as an update to prevent overwriting fields maintained in other processes like // OtherIndex::updateOtherIndex. // But we need a way to index documents that don't already exist. We're willing to upsert any full // documents or any documents that we've been explicitly told it is ok to index when they aren't full. // This is typically just done during the first phase of the initial index build. // A quick note about docAsUpsert's merging behavior: It overwrites all fields provided by doc unless they // are objects in both doc and the indexed source. We're ok with this because all of our fields are either // regular types or lists of objects and lists are overwritten. $doc->setDocAsUpsert($fullDocument || $indexOnSkip); $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount); if (!$skipParse) { // Get text to index, based on content and parser output list($content, $parserOutput) = $this->getContentAndParserOutput($page, $forceParse); // Build our page data $pageBuilder = new PageDataBuilder($doc, $title, $content, $parserOutput); $doc = $pageBuilder->build(); // And build the page text itself $textBuilder = new PageTextBuilder($doc, $content, $parserOutput); $doc = $textBuilder->build(); // If we're a file, build its metadata too if ($title->getNamespace() === NS_FILE) { $fileBuilder = new FileDataBuilder($doc, $title); $doc = $fileBuilder->build(); } // Then let hooks have a go MWHooks::run('CirrusSearchBuildDocumentParse', array($doc, $title, $content, $parserOutput, $this->connection)); } if (!$skipLinks) { MWHooks::run('CirrusSearchBuildDocumentLinks', array($doc, $title, $this->connection)); } $documents[] = $doc; } MWHooks::run('CirrusSearchBuildDocumentFinishBatch', array($pages)); return $documents; }