public function execute()
 {
     global $wgFlowSearchMaintenanceTimeout;
     // Set the timeout for maintenance actions
     Connection::getSingleton()->setTimeout2($wgFlowSearchMaintenanceTimeout);
     /** @var Updater[] $updaters */
     $updaters = Container::get('searchindex.updaters');
     foreach ($updaters as $updaterType => $updater) {
         $fromId = $this->getOption('fromId', null);
         $fromId = $fromId ? UUID::create($fromId) : null;
         $toId = $this->getOption('toId', null);
         $toId = $toId ? UUID::create($toId) : null;
         $namespace = $this->getOption('namespace', null);
         $numRevisionsToIndex = $this->getOption('limit', null);
         $total = 0;
         while (true) {
             // if a limit was provided, we should make sure to not fetch
             // more revisions than asked for
             $options = array('LIMIT' => $this->mBatchSize);
             if ($numRevisionsToIndex) {
                 $options['LIMIT'] = min($numRevisionsToIndex, $this->mBatchSize);
                 // since we do this in batches, we'll subtract the size of
                 // each batch until $numRevisionsToIndex is reached
                 $numRevisionsToIndex -= $this->mBatchSize;
                 if ($options['LIMIT'] <= 0) {
                     break;
                 }
             }
             $conditions = $updater->buildQueryConditions($fromId, $toId, $namespace);
             $revisions = $updater->getRevisions($conditions, $options);
             // stop if we're all out of revisions
             if (!$revisions) {
                 break;
             }
             $total += $updater->updateRevisions($revisions, null, null);
             $this->output("Indexed {$total} {$updaterType} document(s)\n");
             // prepare for next batch, starting at the next id
             // prevFromId will default to around unix epoch - there can be
             // no data before that
             $prevFromId = $fromId ?: UUID::getComparisonUUID('1');
             $fromId = $this->getNextFromId($revisions);
             // make sure we don't get stuck in an infinite loop
             $diff = $prevFromId->getTimestampObj()->diff($fromId->getTimestampObj());
             // invert will be 1 if the diff is a negative time period from
             // $prevFromId to $fromId, which means that the new $timestamp is
             // more recent than our current $result
             if ($diff->invert) {
                 $this->error('Got stuck in an infinite loop.' . "\n" . 'workflow_last_update_timestamp is likely incorrect ' . 'for some workflows.' . "\n" . 'Run maintenance/FlowFixWorkflowLastUpdateTimestamp.php ' . 'to automatically fix those.', 1);
             }
             // prevent memory from being filled up
             Container::get('storage')->clear();
         }
     }
 }
 /**
  * We want to retrieve the total amount of search word hits
  * (static::termsAggregation) but our search terms may not be how
  * ElasticSearch stores the words in its index.
  * Elastic will "analyze" text (perform stemming, etc) and store
  * the terms in a normalized way.
  * AFAICT, there is not really a way to get to that information
  * from within a search query.
  *
  * Luckily, since 1.0, Elastic supports _termvector, which gives
  * you statistics about the terms in your document.
  * Since 1.4, Elastic supports feeding _termvector documents to
  * analyze.
  * We're going to (ab)use this by letting it respond with term
  * information on a bogus document that contains only our current
  * search terms.
  * So we'll give it a document with just our keywords for the
  * column that we're searching in (revisions.text) and Elastic will
  * use that column's configuration to analyze the text we feed it.
  * It will then respond with the normalized terms & their stats.
  *
  * @param string $terms
  * @return array
  */
 protected function getTerms($terms)
 {
     $terms = preg_split('/\\s+/', $terms);
     // _termvectors only works on a type, but our types are
     // configured exactly the same so it doesn't matter which
     $types = Connection::getAllTypes();
     $searchable = Connection::getFlowIndex($this->indexBaseName);
     $searchable = $searchable->getType(array_pop($types));
     $query = array('doc' => array('revisions' => array('text' => $terms)), "fields" => array("revisions.text"));
     // Elastica has no abstraction over _termvector like it has
     // for _query, so just do the request ourselves
     $response = $searchable->request('_termvector', Request::POST, $query, array());
     $data = $response->getData();
     return array_keys($data['term_vectors']['revisions.text']['terms']);
 }
 /**
  * @param \Elastica\Document[] $documents
  * @param string|null $shardTimeout Timeout in Elasticsearch time format (1m, 15s, ...)
  */
 protected function sendDocuments(array $documents, $shardTimeout = null)
 {
     if (count($documents) === 0) {
         return;
     }
     try {
         // addDocuments (notice plural) is the bulk api
         $bulk = new \Elastica\Bulk(Connection::getSingleton()->getClient2());
         if ($shardTimeout !== null) {
             $bulk->setShardTimeout($shardTimeout);
         }
         $index = Connection::getFlowIndex(wfWikiId());
         $type = $index->getType($this->getTypeName());
         $bulk->setType($type);
         $bulk->addDocuments($documents);
         $bulk->send();
     } catch (\Exception $e) {
         $documentIds = array_map(function ($doc) {
             return $doc->getId();
         }, $documents);
         wfWarn(__METHOD__ . ': Failed updating documents (' . implode(',', $documentIds) . '): ' . $e->getMessage());
     }
 }
 /**
  * Set the search index to search in.
  * false is allowed (means we'll search *all* types)
  *
  * @param string|false $type
  * @throws InvalidInputException
  */
 public function setType($type)
 {
     $allowedTypes = array_merge(Connection::getAllTypes(), array(false));
     if (!in_array($type, $allowedTypes)) {
         throw new InvalidInputException('Invalid search sort requested', 'invalid-input');
     }
     $this->type = $type;
 }
 protected function getAllIndices()
 {
     return Connection::getAllIndices();
 }
 public function getParamDescription()
 {
     $p = $this->getModulePrefix();
     return array('term' => 'Search term', 'title' => "Title of the boards to search in. Cannot be used together with {$p}pageid", 'pageid' => "ID of the boards to search in. Cannot be used together with {$p}title", 'namespaces' => 'Namespaces to search in', 'moderationState' => 'Search for revisions in (a) particular moderation state(s)', 'sort' => 'What to order the search results by', 'type' => 'Desired type of results (' . implode('|', Connection::getAllTypes()) . ')', 'offset' => 'Offset value to start fetching results at', 'limit' => 'Amount of results to fetch');
 }