/**
  * Build the result.
  * @param $results \Elastica\ResultSet containing all search results
  * @param $result \Elastica\Result containing the given search result
  * @param string $interwiki Interwiki prefix, if any
  * @param $result \Elastic\Result containing information about the result this class should represent
  */
 public function __construct($results, $result, $interwiki = '')
 {
     if ($interwiki) {
         $this->setInterwiki($result, $interwiki);
     }
     $this->docId = $result->getId();
     $this->mTitle = Title::makeTitle($result->namespace, $result->title, '', $this->interwiki);
     if ($this->getTitle()->getNamespace() == NS_FILE) {
         $this->mImage = wfFindFile($this->mTitle);
     }
     $fields = $result->getFields();
     // Not all results requested a word count. Just pretend we have none if so
     $this->wordCount = isset($fields['text.word_count']) ? $fields['text.word_count'][0] : 0;
     $this->byteSize = $result->text_bytes;
     $this->timestamp = new MWTimestamp($result->timestamp);
     $highlights = $result->getHighlights();
     if (isset($highlights['title'])) {
         $nstext = $this->getTitle()->getNamespace() === 0 ? '' : Util::getNamespaceText($this->getTitle()) . ':';
         $this->titleSnippet = $nstext . $this->escapeHighlightedText($highlights['title'][0]);
     } elseif ($this->mTitle->isExternal()) {
         // Interwiki searches are weird. They won't have title highlights by design, but
         // if we don't return a title snippet we'll get weird display results.
         $nsText = $this->getInterwikiNamespaceText();
         $titleText = $this->mTitle->getText();
         $this->titleSnippet = $nsText ? "{$nsText}:{$titleText}" : $titleText;
     }
     if (!isset($highlights['title']) && isset($highlights['redirect.title'])) {
         // Make sure to find the redirect title before escaping because escaping breaks it....
         $redirects = $result->redirect;
         $this->redirectTitle = $this->findRedirectTitle($highlights['redirect.title'][0], $redirects);
         $this->redirectSnipppet = $this->escapeHighlightedText($highlights['redirect.title'][0]);
     }
     $this->textSnippet = $this->escapeHighlightedText($this->pickTextSnippet($highlights));
     if (isset($highlights['heading'])) {
         $this->sectionSnippet = $this->escapeHighlightedText($highlights['heading'][0]);
         $this->sectionTitle = $this->findSectionTitle();
     }
     if (isset($highlights['category'])) {
         $this->categorySnippet = $this->escapeHighlightedText($highlights['category'][0]);
     }
 }
 /**
  * @param \Exception $e exception caught
  * @param int $errors number of errors
  * @param Maintenance $out
  * @param string $messagePrefix
  * @param string $description
  */
 public function sleepOnRetry(\Exception $e, $errors, $messagePrefix, $description)
 {
     $type = get_class($e);
     $seconds = Util::backoffDelay($errors);
     $message = ElasticsearchIntermediary::extractMessage($e);
     $this->outputIndented($messagePrefix . "Caught an error {$description}.  " . "Backing off for {$seconds} and retrying.  Error type is '{$type}' and message is:  {$message}\n");
     sleep($seconds);
 }
 /**
  * Extract more like this settings from the i18n message cirrussearch-morelikethis-settings
  */
 private static function overrideMoreLikeThisOptionsFromMessage()
 {
     global $wgCirrusSearchMoreLikeThisConfig, $wgCirrusSearchMoreLikeThisUseFields, $wgCirrusSearchMoreLikeThisAllowedFields, $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit, $wgCirrusSearchMoreLikeThisFields;
     $cache = \ObjectCache::newAccelerator(CACHE_NONE);
     $lines = $cache->getWithSetCallback($cache->makeKey('cirrussearch-morelikethis-settings'), 600, function () {
         $source = wfMessage('cirrussearch-morelikethis-settings')->inContentLanguage();
         if ($source && $source->isDisabled()) {
             return array();
         }
         return Util::parseSettingsInMessage($source->plain());
     });
     foreach ($lines as $line) {
         list($k, $v) = explode(':', $line, 2);
         switch ($k) {
             case 'min_doc_freq':
             case 'max_doc_freq':
             case 'max_query_terms':
             case 'min_term_freq':
             case 'min_word_len':
             case 'max_word_len':
                 if (is_numeric($v) && $v >= 0) {
                     $wgCirrusSearchMoreLikeThisConfig[$k] = intval($v);
                 } else {
                     if ($v === 'null') {
                         unset($wgCirrusSearchMoreLikeThisConfig[$k]);
                     }
                 }
                 break;
             case 'percent_terms_to_match':
                 if (is_numeric($v) && $v > 0 && $v <= 1) {
                     $wgCirrusSearchMoreLikeThisConfig[$k] = $v;
                 } else {
                     if ($v === 'null') {
                         unset($wgCirrusSearchMoreLikeThisConfig[$k]);
                     }
                 }
                 break;
             case 'fields':
                 $wgCirrusSearchMoreLikeThisFields = array_intersect(array_map('trim', explode(',', $v)), $wgCirrusSearchMoreLikeThisAllowedFields);
                 break;
             case 'use_fields':
                 if ($v === 'true') {
                     $wgCirrusSearchMoreLikeThisUseFields = true;
                 } else {
                     if ($v === 'false') {
                         $wgCirrusSearchMoreLikeThisUseFields = false;
                     }
                 }
                 break;
         }
         if ($wgCirrusSearchMoreLikeThisConfig['max_query_terms'] > $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit) {
             $wgCirrusSearchMoreLikeThisConfig['max_query_terms'] = $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit;
         }
     }
 }
 /**
  * @return float[]
  */
 public static function getDefaultBoostTemplates()
 {
     if (self::$defaultBoostTemplates === null) {
         $cache = \ObjectCache::newAccelerator(CACHE_NONE);
         self::$defaultBoostTemplates = $cache->getWithSetCallback($cache->makeKey('cirrussearch-boost-templates'), 600, function () {
             $source = wfMessage('cirrussearch-boost-templates')->inContentLanguage();
             if (!$source->isDisabled()) {
                 $lines = Util::parseSettingsInMessage($source->plain());
                 return Util::parseBoostTemplates(implode(' ', $lines));
                 // Now parse the templates
             }
             return array();
         });
     }
     return self::$defaultBoostTemplates;
 }
 private function getIgnoredHeadings()
 {
     static $ignoredHeadings = null;
     if ($ignoredHeadings === null) {
         $source = wfMessage('cirrussearch-ignored-headings')->inContentLanguage();
         $ignoredHeadings = array();
         if (!$source->isDisabled()) {
             $lines = Util::parseSettingsInMessage($source->plain());
             $ignoredHeadings = $lines;
             // Now we just have headings!
         }
     }
     return $ignoredHeadings;
 }
 /**
  * Wraps the complex pool counter interface to force the single call pattern
  * that Cirrus always uses.
  * @param $type same as type parameter on PoolCounter::factory
  * @param $user the user
  * @param $workCallback callback when pool counter is aquired.  Called with
  *   no parameters.
  * @param $errorCallback optional callback called on errors.  Called with
  *   the error string and the key as parameters.  If left undefined defaults
  *   to a function that returns a fatal status and logs an warning.
  */
 public static function doPoolCounterWork($type, $user, $workCallback, $errorCallback = null)
 {
     global $wgCirrusSearchPoolCounterKey;
     // By default the pool counter allows you to lock the same key with
     // multiple types.  That might be useful but it isn't how Cirrus thinks.
     // Instead, all keys are scoped to their type.
     if (!$user) {
         // We don't want to even use the pool counter if there isn't a user.
         return $workCallback();
     }
     $perUserKey = md5($user->getName());
     $perUserKey = "nowait:CirrusSearch:_per_user:{$perUserKey}";
     $globalKey = "{$type}:{$wgCirrusSearchPoolCounterKey}";
     if ($errorCallback === null) {
         $errorCallback = function ($error, $key, $userName) {
             $forUserName = $userName ? "for {userName} " : '';
             LoggerFactory::getInstance('CirrusSearch')->warning("Pool error {$forUserName}on {key}:  {error}", array('userName' => $userName, 'key' => $key, 'error' => $error));
             return Status::newFatal('cirrussearch-backend-error');
         };
     }
     $errorHandler = function ($key) use($errorCallback, $user) {
         return function ($status) use($errorCallback, $key, $user) {
             $status = $status->getErrorsArray();
             // anon usernames are needed within the logs to determine if
             // specific ips (such as large #'s of users behind a proxy)
             // need to be whitelisted. We do not need this information
             // for logged in users and do not store it.
             $userName = $user->isAnon() ? $user->getName() : '';
             return $errorCallback($status[0][0], $key, $userName);
         };
     };
     $doPerUserWork = function () use($type, $globalKey, $workCallback, $errorHandler) {
         // Now that we have the per user lock lets get the operation lock.
         // Note that this could block, causing the user to wait in line with their lock held.
         $work = new PoolCounterWorkViaCallback($type, $globalKey, array('doWork' => $workCallback, 'error' => $errorHandler($globalKey)));
         return $work->execute();
     };
     $work = new PoolCounterWorkViaCallback('CirrusSearch-PerUser', $perUserKey, array('doWork' => $doPerUserWork, 'error' => function ($status) use($errorHandler, $perUserKey, $doPerUserWork) {
         $errorCallback = $errorHandler($perUserKey);
         $errorResult = $errorCallback($status);
         if (Util::isUserPoolCounterActive()) {
             return $errorResult;
         } else {
             return $doPerUserWork();
         }
     }));
     return $work->execute();
 }
 public function execute()
 {
     global $wgPoolCounterConf;
     // Make sure we don't flood the pool counter
     unset($wgPoolCounterConf['CirrusSearch-Search']);
     // Set the timeout for maintenance actions
     $this->setConnectionTimeout();
     $this->indexType = $this->getOption('indexType');
     $this->indexBaseName = $this->getOption('baseName', wfWikiId());
     $indexTypes = $this->getConnection()->getAllIndexTypes();
     if (!in_array($this->indexType, $indexTypes)) {
         $this->error('indexType option must be one of ' . implode(', ', $indexTypes), 1);
     }
     $utils = new ConfigUtils($this->getConnection()->getClient(), $this);
     $this->indexIdentifier = $this->getOption('indexIdentifier');
     $filter = null;
     if ($this->hasOption('filter')) {
         $filter = new Elastica\Filter\Query(new Elastica\Query\QueryString($this->getOption('filter')));
     }
     $limit = (int) $this->getOption('limit', 0);
     $query = new Query();
     $query->setFields(array('_id', '_type', '_source'));
     if ($this->hasOption('sourceFields')) {
         $sourceFields = explode(',', $this->getOption('sourceFields'));
         $query->setSource(array('include' => $sourceFields));
     }
     if ($filter) {
         $query->setQuery(new \Elastica\Query\Filtered(new \Elastica\Query\MatchAll(), $filter));
     }
     $scrollOptions = array('search_type' => 'scan', 'scroll' => "15m", 'size' => $this->inputChunkSize);
     $index = $this->getIndex();
     $result = $index->search($query, $scrollOptions);
     $totalDocsInIndex = $result->getResponse()->getData();
     $totalDocsInIndex = $totalDocsInIndex['hits']['total'];
     $totalDocsToDump = $limit > 0 ? $limit : $totalDocsInIndex;
     $docsDumped = 0;
     $this->logToStderr = true;
     $this->output("Dumping {$totalDocsToDump} documents ({$totalDocsInIndex} in the index)\n");
     $self = $this;
     Util::iterateOverScroll($index, $result->getResponse()->getScrollId(), '15m', function ($results) use($self, &$docsDumped, $totalDocsToDump) {
         foreach ($results as $result) {
             $document = array('_id' => $result->getId(), '_type' => $result->getType(), '_source' => $result->getSource());
             $self->write($document);
             $docsDumped++;
             $self->outputProgress($docsDumped, $totalDocsToDump);
         }
     }, $limit, 5);
     $this->output("Dump done.\n");
 }
 /**
  * @param integer $maxDocs the number of docs in the index
  * @param float[]|null $boostTemplates Array of key values, key is the template name, value the boost factor.
  *        Defaults to Util::getDefaultBoostTemplates()
  */
 public function __construct($maxDocs, $boostTemplates = null)
 {
     $this->maxDocs = $maxDocs;
     $this->boostTemplates = $boostTemplates ?: Util::getDefaultBoostTemplates();
     // We normalize incoming links according to the size of the index
     $this->incomingLinksNorm = (int) ($maxDocs * self::INCOMING_LINKS_MAX_DOCS_FACTOR);
     if ($this->incomingLinksNorm < 1) {
         // it's a very small wiki let's force the norm to 1
         $this->incomingLinksNorm = 1;
     }
 }
 public function execute()
 {
     global $wgPoolCounterConf, $wgLanguageCode, $wgCirrusSearchPhraseSuggestUseText, $wgCirrusSearchPrefixSearchStartsWithAnyWord, $wgCirrusSearchBannedPlugins, $wgCirrusSearchOptimizeIndexForExperimentalHighlighter, $wgCirrusSearchMaxShardsPerNode, $wgCirrusSearchRefreshInterval;
     // Make sure we don't flood the pool counter
     unset($wgPoolCounterConf['CirrusSearch-Search']);
     // Set the timeout for maintenance actions
     $this->setConnectionTimeout();
     $utils = new ConfigUtils($this->getConnection()->getClient(), $this);
     $this->indexType = $this->getOption('indexType');
     $this->startOver = $this->getOption('startOver', false);
     $this->indexBaseName = $this->getOption('baseName', wfWikiId());
     $this->reindexAndRemoveOk = $this->getOption('reindexAndRemoveOk', false);
     $this->reindexProcesses = $this->getOption('reindexProcesses', wfIsWindows() ? 1 : 5);
     $this->reindexAcceptableCountDeviation = Util::parsePotentialPercent($this->getOption('reindexAcceptableCountDeviation', '5%'));
     $this->reindexChunkSize = $this->getOption('reindexChunkSize', 100);
     $this->reindexRetryAttempts = $this->getOption('reindexRetryAttempts', 5);
     $this->printDebugCheckConfig = $this->getOption('debugCheckConfig', false);
     $this->langCode = $wgLanguageCode;
     $this->prefixSearchStartsWithAny = $wgCirrusSearchPrefixSearchStartsWithAnyWord;
     $this->phraseSuggestUseText = $wgCirrusSearchPhraseSuggestUseText;
     $this->bannedPlugins = $wgCirrusSearchBannedPlugins;
     $this->optimizeIndexForExperimentalHighlighter = $wgCirrusSearchOptimizeIndexForExperimentalHighlighter;
     $this->maxShardsPerNode = isset($wgCirrusSearchMaxShardsPerNode[$this->indexType]) ? $wgCirrusSearchMaxShardsPerNode[$this->indexType] : 'unlimited';
     $this->refreshInterval = $wgCirrusSearchRefreshInterval;
     try {
         $indexTypes = $this->getConnection()->getAllIndexTypes();
         if (!in_array($this->indexType, $indexTypes)) {
             $this->error('indexType option must be one of ' . implode(', ', $indexTypes), 1);
         }
         $utils->checkElasticsearchVersion();
         $this->availablePlugins = $utils->scanAvailablePlugins($this->bannedPlugins);
         if ($this->getOption('justCacheWarmers', false)) {
             $this->validateCacheWarmers();
             return;
         }
         if ($this->getOption('justAllocation', false)) {
             $this->validateShardAllocation();
             return;
         }
         $this->indexIdentifier = $utils->pickIndexIdentifierFromOption($this->getOption('indexIdentifier', 'current'), $this->getIndexTypeName());
         $this->analysisConfigBuilder = $this->pickAnalyzer($this->langCode, $this->availablePlugins);
         $this->validateIndex();
         $this->validateAnalyzers();
         $this->validateMapping();
         $this->validateCacheWarmers();
         $this->validateAlias();
         $this->updateVersions();
         $this->indexNamespaces();
     } catch (\Elastica\Exception\Connection\HttpException $e) {
         $message = $e->getMessage();
         $this->output("\nUnexpected Elasticsearch failure.\n");
         $this->error("Http error communicating with Elasticsearch:  {$message}.\n", 1);
     } catch (\Elastica\Exception\ExceptionInterface $e) {
         $type = get_class($e);
         $message = ElasticsearchIntermediary::extractMessage($e);
         $trace = $e->getTraceAsString();
         $this->output("\nUnexpected Elasticsearch failure.\n");
         $this->error("Elasticsearch failed in an unexpected way.  This is always a bug in CirrusSearch.\n" . "Error type: {$type}\n" . "Message: {$message}\n" . "Trace:\n" . $trace, 1);
     }
 }
 private function diff($expectedWarmers, $actualWarmers)
 {
     $result = array();
     foreach ($expectedWarmers as $key => $value) {
         if (!isset($actualWarmers[$key]) || !Util::recursiveSame($value, $actualWarmers[$key])) {
             $result[$key] = $value;
         }
     }
     return $result;
 }
 /**
  * merge top level multi-queries and resolve returned pageIds into Title objects.
  *
  * WARNING: experimental API
  *
  * @param string $query the user query
  * @param \Elastica\Response $response Response from elasticsearch _suggest api
  * @param array $profiles the suggestion profiles
  * @param int $limit Maximum suggestions to return, -1 for unlimited
  * @return SearchSuggestionSet a set of Suggestions
  */
 protected function postProcessSuggest(\Elastica\Response $response, $profiles, $limit = -1)
 {
     $this->logContext['elasticTookMs'] = intval($response->getQueryTime() * 1000);
     $data = $response->getData();
     unset($data['_shards']);
     $suggestions = array();
     foreach ($data as $name => $results) {
         $discount = $profiles[$name]['discount'];
         foreach ($results as $suggested) {
             foreach ($suggested['options'] as $suggest) {
                 $output = SuggestBuilder::decodeOutput($suggest['text']);
                 if ($output === null) {
                     // Ignore broken output
                     continue;
                 }
                 $pageId = $output['id'];
                 $type = $output['type'];
                 $score = $discount * $suggest['score'];
                 if (!isset($suggestions[$pageId]) || $score > $suggestions[$pageId]->getScore()) {
                     $suggestion = new SearchSuggestion($score, null, null, $pageId);
                     // If it's a title suggestion we have the text
                     if ($type === SuggestBuilder::TITLE_SUGGESTION) {
                         $suggestion->setText($output['text']);
                     }
                     $suggestions[$pageId] = $suggestion;
                 }
             }
         }
     }
     // simply sort by existing scores
     uasort($suggestions, function ($a, $b) {
         return $b->getScore() - $a->getScore();
     });
     $this->logContext['hitsTotal'] = count($suggestions);
     if ($limit > 0) {
         $suggestions = array_slice($suggestions, 0, $limit, true);
     }
     $this->logContext['hitsReturned'] = count($suggestions);
     $this->logContext['hitsOffset'] = 0;
     // we must fetch redirect data for redirect suggestions
     $missingText = array();
     foreach ($suggestions as $id => $suggestion) {
         if ($suggestion->getText() === null) {
             $missingText[] = $id;
         }
     }
     if (!empty($missingText)) {
         // Experimental.
         //
         // Second pass query to fetch redirects.
         // It's not clear if it's the best option, this will slowdown the whole query
         // when we hit a redirect suggestion.
         // Other option would be to encode redirects as a payload resulting in a
         // very big index...
         // XXX: we support only the content index
         $type = $this->connection->getPageType($this->indexBaseName, Connection::CONTENT_INDEX_TYPE);
         // NOTE: we are already in a poolCounterWork
         // Multi get is not supported by elastica
         $redirResponse = null;
         try {
             $redirResponse = $type->request('_mget', 'GET', array('ids' => $missingText), array('_source_include' => 'redirect'));
             if ($redirResponse->isOk()) {
                 $this->logContext['elasticTook2PassMs'] = intval($redirResponse->getQueryTime() * 1000);
                 $docs = $redirResponse->getData();
                 foreach ($docs['docs'] as $doc) {
                     if (empty($doc['_source']['redirect'])) {
                         continue;
                     }
                     // We use the original query, we should maybe use the variant that generated this result?
                     $text = Util::chooseBestRedirect($this->term, $doc['_source']['redirect']);
                     if (!empty($suggestions[$doc['_id']])) {
                         $suggestions[$doc['_id']]->setText($text);
                     }
                 }
             } else {
                 LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $redirResponse->getError()));
             }
         } catch (\Elastica\Exception\ExceptionInterface $e) {
             LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $this->extractMessage($e)));
         }
     }
     return new SearchSuggestionSet(array_filter($suggestions, function ($suggestion) {
         // text should be not empty for suggestions
         return $suggestion->getText() != null;
     }));
 }
 public function testChooseBestRedirect()
 {
     $convert = function ($x) {
         $redirect = array();
         foreach ($x as $t) {
             $redirect[] = array('title' => $t, 'namespace' => 0);
         }
         return $redirect;
     };
     $input = $convert(array('Al. Einstein', 'Albert Einstein', 'A. Einstein', 'Einstein, Albert'));
     $this->assertEquals('Al. Einstein', Util::chooseBestRedirect('a', $input));
     $this->assertEquals('Al. Einstein', Util::chooseBestRedirect('al', $input));
     $this->assertEquals('Albert Einstein', Util::chooseBestRedirect('albet', $input));
     $this->assertEquals('Einstein, Albert', Util::chooseBestRedirect('Einstein', $input));
     $this->assertEquals('Einstein, Albert', Util::chooseBestRedirect('Ens', $input));
 }
 private function indexData()
 {
     $query = new Query();
     $query->setFields(array('_id', '_type', '_source'));
     // Exclude content fields to save bandwidth
     $query->setSource(array('exclude' => array('text', 'source_text', 'opening_text', 'auxiliary_text')));
     $query->setQuery(new Elastica\Query\Filtered(new Elastica\Query\MatchAll(), new Elastica\Filter\BoolAnd(array(new Elastica\Filter\Type(Connection::PAGE_TYPE_NAME), new Elastica\Filter\Term(array("namespace" => NS_MAIN))))));
     $scrollOptions = array('search_type' => 'scan', 'scroll' => "15m", 'size' => $this->indexChunkSize);
     // TODO: only content index for now ( we'll have to check how it works with commons )
     $sourceIndex = $this->getConnection()->getIndex($this->indexBaseName, Connection::CONTENT_INDEX_TYPE);
     $result = $sourceIndex->search($query, $scrollOptions);
     $totalDocsInIndex = $result->getResponse()->getData();
     $totalDocsInIndex = $totalDocsInIndex['hits']['total'];
     $totalDocsToDump = $totalDocsInIndex;
     $scoreMethodName = $this->getOption('scoringMethod', 'quality');
     $this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod($scoreMethodName, $totalDocsInIndex);
     $builder = new SuggestBuilder($this->scoreMethod, $this->withGeo);
     $docsDumped = 0;
     $this->output("Indexing {$totalDocsToDump} documents ({$totalDocsInIndex} in the index)\n");
     $self = $this;
     $destinationType = $this->getIndex()->getType(Connection::TITLE_SUGGEST_TYPE_NAME);
     $retryAttempts = $this->indexRetryAttempts;
     Util::iterateOverScroll($sourceIndex, $result->getResponse()->getScrollId(), '15m', function ($results) use($self, &$docsDumped, $totalDocsToDump, $builder, $destinationType, $retryAttempts) {
         $suggestDocs = array();
         foreach ($results as $result) {
             $docsDumped++;
             $suggests = $builder->build($result->getId(), $result->getSource());
             foreach ($suggests as $suggest) {
                 $suggestDocs[] = new \Elastica\Document(null, $suggest);
             }
         }
         $self->outputProgress($docsDumped, $totalDocsToDump);
         Util::withRetry($retryAttempts, function () use($destinationType, $suggestDocs) {
             $destinationType->addDocuments($suggestDocs);
         });
     }, 0, $retryAttempts);
     $this->output("Indexing done.\n");
 }
 protected function setProperties()
 {
     global $wgLanguageCode, $wgFlowSearchBannedPlugins, $wgFlowSearchOptimizeIndexForExperimentalHighlighter, $wgFlowSearchIndexAllocation, $wgFlowSearchMaintenanceTimeout, $wgFlowSearchRefreshInterval, $wgFlowSearchMaxShardsPerNode, $wgFlowSearchCacheWarmers;
     $this->connection = Connection::getSingleton();
     $this->utils = new ConfigUtils($this->getClient(), $this);
     $this->indexType = 'flow';
     // only 1 index for Flow
     $this->startOver = $this->getOption('startOver', false);
     $this->indexBaseName = $this->getOption('baseName', wfWikiId());
     $this->reindexAndRemoveOk = $this->getOption('reindexAndRemoveOk', false);
     $this->reindexProcesses = $this->getOption('reindexProcesses', wfIsWindows() ? 1 : 5);
     $this->reindexChunkSize = $this->getOption('reindexChunkSize', 100);
     $this->reindexRetryAttempts = $this->getOption('reindexRetryAttempts', 5);
     $this->printDebugCheckConfig = $this->getOption('debugCheckConfig', false);
     $this->langCode = $wgLanguageCode;
     $this->bannedPlugins = $wgFlowSearchBannedPlugins;
     $this->optimizeIndexForExperimentalHighlighter = $wgFlowSearchOptimizeIndexForExperimentalHighlighter;
     $this->indexAllocation = $wgFlowSearchIndexAllocation;
     $this->maintenanceTimeout = $wgFlowSearchMaintenanceTimeout;
     $this->refreshInterval = $wgFlowSearchRefreshInterval;
     $this->maxShardsPerNode = isset($wgFlowSearchMaxShardsPerNode[$this->indexType]) ? $wgFlowSearchMaxShardsPerNode[$this->indexType] : 'unlimited';
     $this->cacheWarmers = isset($wgFlowSearchCacheWarmers[$this->indexType]) ? $wgFlowSearchCacheWarmers[$this->indexType] : array();
     $this->indexIdentifier = $this->utils->pickIndexIdentifierFromOption($this->getOption('indexIdentifier', 'current'), $this->getIndexTypeName());
     $this->reindexAcceptableCountDeviation = Util::parsePotentialPercent($this->getOption('reindexAcceptableCountDeviation', '5%'));
     $this->availablePlugins = $this->utils->scanAvailablePlugins($this->bannedPlugins);
     $this->analysisConfigBuilder = $this->pickAnalyzer($this->langCode, $this->availablePlugins);
     $this->tooFewReplicas = $this->reindexAndRemoveOk && ($this->startOver || !$this->getIndex()->exists());
 }
 /**
  * @return float[]
  */
 public static function getDefaultBoostTemplates()
 {
     static $defaultBoostTemplates = null;
     if ($defaultBoostTemplates === null) {
         $source = wfMessage('cirrussearch-boost-templates')->inContentLanguage();
         $defaultBoostTemplates = array();
         if (!$source->isDisabled()) {
             $lines = Util::parseSettingsInMessage($source->plain());
             $defaultBoostTemplates = self::parseBoostTemplates(implode(' ', $lines));
             // Now parse the templates
         }
     }
     return $defaultBoostTemplates;
 }
 /**
  * {@inheritDoc}
  */
 public function buildMainQuery(array $fields, $queryString, $phraseSlop)
 {
     $plainFields = array();
     $stemFields = array();
     // Separate plain and stem fields first
     foreach ($fields as $f) {
         list($field, $boost) = explode('^', $f, 2);
         $fieldInfo = array('field' => $field, 'boost' => $boost);
         if (Util::endsWith($field, '.plain')) {
             $plainFields[] = $fieldInfo;
         } else {
             $stemFields[] = $fieldInfo;
         }
     }
     $query = new \Elastica\Query\Bool();
     $query->setMinimumNumberShouldMatch(1);
     // We always build a common terms query for the plain field
     $this->attachCommonTermsClause($query, $plainFields, $queryString, $this->profile);
     // We can use different types of query for the stem field.
     if (count($stemFields) === 1) {
         $this->attachSingleFieldStemClause($query, $stemFields[0], $queryString);
     } else {
         $this->attachMultiFieldsStemClause($query, $stemFields, $queryString);
     }
     return $query;
 }
 /**
  * @param \WikiPage[] $pages
  * @param int $flags
  */
 private function buildDocumentsForPages($pages, $flags)
 {
     global $wgCirrusSearchUpdateConflictRetryCount;
     $indexOnSkip = $flags & self::INDEX_ON_SKIP;
     $skipParse = $flags & self::SKIP_PARSE;
     $skipLinks = $flags & self::SKIP_LINKS;
     $forceParse = $flags & self::FORCE_PARSE;
     $fullDocument = !($skipParse || $skipLinks);
     $documents = array();
     foreach ($pages as $page) {
         $title = $page->getTitle();
         if (!$page->exists()) {
             LoggerFactory::getInstance('CirrusSearch')->warning('Attempted to build a document for a page that doesn\'t exist.  This should be caught ' . "earlier but wasn't.  Page: {title}", array('title' => $title));
             continue;
         }
         $doc = new \Elastica\Document($page->getId(), array('version' => $page->getLatest(), 'version_type' => 'external', 'namespace' => $title->getNamespace(), 'namespace_text' => Util::getNamespaceText($title), 'title' => $title->getText(), 'timestamp' => wfTimestamp(TS_ISO_8601, $page->getTimestamp())));
         // Everything as sent as an update to prevent overwriting fields maintained in other processes like
         // OtherIndex::updateOtherIndex.
         // But we need a way to index documents that don't already exist.  We're willing to upsert any full
         // documents or any documents that we've been explicitly told it is ok to index when they aren't full.
         // This is typically just done during the first phase of the initial index build.
         // A quick note about docAsUpsert's merging behavior:  It overwrites all fields provided by doc unless they
         // are objects in both doc and the indexed source.  We're ok with this because all of our fields are either
         // regular types or lists of objects and lists are overwritten.
         $doc->setDocAsUpsert($fullDocument || $indexOnSkip);
         $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount);
         if (!$skipParse) {
             // Get text to index, based on content and parser output
             list($content, $parserOutput) = $this->getContentAndParserOutput($page, $forceParse);
             // Build our page data
             $pageBuilder = new PageDataBuilder($doc, $title, $content, $parserOutput);
             $doc = $pageBuilder->build();
             // And build the page text itself
             $textBuilder = new PageTextBuilder($doc, $content, $parserOutput);
             $doc = $textBuilder->build();
             // If we're a file, build its metadata too
             if ($title->getNamespace() === NS_FILE) {
                 $fileBuilder = new FileDataBuilder($doc, $title);
                 $doc = $fileBuilder->build();
             }
             // Then let hooks have a go
             MWHooks::run('CirrusSearchBuildDocumentParse', array($doc, $title, $content, $parserOutput, $this->connection));
         }
         if (!$skipLinks) {
             MWHooks::run('CirrusSearchBuildDocumentLinks', array($doc, $title, $this->connection));
         }
         $documents[] = $doc;
     }
     MWHooks::run('CirrusSearchBuildDocumentFinishBatch', array($pages));
     return $documents;
 }
 /**
  * @param SearchContext $context
  * @param float $weight
  */
 public function __construct(SearchContext $context, $weight)
 {
     parent::__construct($context, $weight);
     // Use the boosted template from query string if available
     $this->boostTemplates = $context->getBoostTemplatesFromQuery();
     // empty array may be returned here in the case of a syntax error
     // @todo: verify that this is what we want: in case of a syntax error
     // we disable default boost templates.
     if ($this->boostTemplates === null) {
         // Fallback to default otherwize
         $this->boostTemplates = Util::getDefaultBoostTemplates();
     }
 }