/**
  * @dataProvider provideNamespacesInIndexType
  */
 public function testNamespacesInIndexType($contentNamespaces, $namespaceMappings, $indexType, $expected)
 {
     global $wgContentNamespaces, $wgCirrusSearchNamespaceMappings;
     $wgContentNamespaces = $contentNamespaces;
     $wgCirrusSearchNamespaceMappings = $namespaceMappings;
     $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
     $conn = new Connection($config);
     $this->assertEquals($expected, $conn->namespacesInIndexType($indexType));
 }
 /**
  * Destroy client connections
  */
 private function destroyClients()
 {
     $this->connection->destroyClient();
     $this->oldConnection->destroyClient();
     // Destroying connections resets timeouts, so we have to reinstate them
     $this->setConnectionTimeout();
 }
 public function getConnection()
 {
     if ($this->connection === null) {
         $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
         $cluster = $this->decideCluster($config);
         $this->connection = Connection::getPool($config, $cluster);
     }
     return $this->connection;
 }
 protected function decideClusters()
 {
     $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
     if ($this->params['cluster'] !== null) {
         // parent::__construct initialized the correct connection
         $name = $this->connection->getClusterName();
         return array($name => $this->connection);
     }
     $clusters = $config->get('CirrusSearchClusters');
     $connections = array();
     foreach (array_keys($clusters) as $name) {
         $connections[$name] = Connection::getPool($config, $name);
     }
     return $connections;
 }
 public function getConnection($cluster = null)
 {
     if ($cluster) {
         $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
         if (!$config->getElement('CirrusSearchClusters', $cluster)) {
             $this->error('Unknown cluster.', 1);
         }
         return Connection::getPool($config, $cluster);
     }
     if ($this->connection === null) {
         $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
         $cluster = $this->decideCluster($config);
         $this->connection = Connection::getPool($config, $cluster);
     }
     return $this->connection;
 }
 public function __construct($title, $params)
 {
     $params += array('cluster' => null);
     // eg: DeletePages -> cirrusSearchDeletePages
     $jobName = 'cirrusSearch' . str_replace('CirrusSearch\\Job\\', '', get_class($this));
     parent::__construct($jobName, $title, $params);
     // All CirrusSearch jobs are reasonably expensive.  Most involve parsing and it
     // is ok to remove duplicate _unclaimed_ cirrus jobs.  Once a cirrus job is claimed
     // it can't be deduplicated or else the search index will end up with out of date
     // data.  Luckily, this is how the JobQueue implementations work.
     $this->removeDuplicates = true;
     $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch');
     // When the 'cluster' parameter is provided the job must only operate on
     // the specified cluster, take special care to ensure nested jobs get the
     // correct cluster set.  When set to null all clusters should be written to.
     $this->connection = Connection::getPool($config, $params['cluster']);
 }
 /**
  * These values end up serialized into Avro which has strict typing
  * requirements. float !== int !== string.
  *
  * @param float $took Number of milliseconds the request took
  * @return array
  */
 private function buildLogContext($took)
 {
     $client = $this->connection->getClient();
     $query = $client->getLastRequest();
     $result = $client->getLastResponse();
     $params = $this->logContext;
     $this->logContext = array();
     $params += array('tookMs' => intval($took), 'source' => self::getExecutionContext(), 'executor' => self::getExecutionId(), 'identity' => self::generateIdentToken());
     if ($result) {
         $queryData = $query->getData();
         $resultData = $result->getData();
         $index = explode('/', $query->getPath());
         $params['index'] = $index[0];
         if (isset($resultData['took'])) {
             $elasticTook = $resultData['took'];
             $params['elasticTookMs'] = intval($elasticTook);
         }
         if (isset($resultData['hits']['total'])) {
             $params['hitsTotal'] = intval($resultData['hits']['total']);
         }
         if (isset($resultData['hits']['hits'])) {
             $num = count($resultData['hits']['hits']);
             $offset = isset($queryData['from']) ? $queryData['from'] : 0;
             $params['hitsReturned'] = $num;
             $params['hitsOffset'] = intval($offset);
         }
         if ($this->_isset($queryData, array('query', 'filtered', 'filter', 'terms', 'namespace'))) {
             $namespaces = $queryData['query']['filtered']['filter']['terms']['namespace'];
             $params['namespaces'] = array_map('intval', $namespaces);
         }
         if (isset($resultData['suggest']['suggest'][0]['options'][0]['text'])) {
             $params['suggestion'] = $resultData['suggest']['suggest'][0]['options'][0]['text'];
         }
     }
     if (self::$logContexts === null) {
         DeferredUpdates::addCallableUpdate(function () {
             ElasticsearchIntermediary::reportLogContexts();
         });
         self::$logContexts = array();
     }
     self::$logContexts[] = $params;
     return $params;
 }
 /**
  * Try to detect language using langdetect plugin
  * See: https://github.com/jprante/elasticsearch-langdetect
  * @param string $text
  * @return string|NULL Language name or null
  */
 public static function detectLanguage($text)
 {
     $client = Connection::getClient();
     try {
         $response = $client->request("_langdetect", Request::POST, $text);
     } catch (ResponseException $e) {
         // This happens when language detection is not configured
         LoggerFactory::getInstance('CirrusSearch')->warning("Could not connect to language detector: {exception}", array("exception" => $e->getMessage()));
         return null;
     }
     if ($response->isOk()) {
         $value = $response->getData();
         if ($value && !empty($value['languages'])) {
             $langs = $value['languages'];
             if (count($langs) == 1) {
                 // TODO: add minimal threshold
                 return $langs[0]['language'];
             }
             // FIXME: here I'm just winging it, should be something
             // that makes sense for multiple languages
             if (count($langs) == 2) {
                 if ($langs[0]['probability'] > 2 * $langs[1]['probability']) {
                     return $langs[0]['language'];
                 }
             }
         }
     }
     return null;
 }
 /**
  * Re-queue job that failed, or drop the job if it has failed
  * too many times
  *
  * @param Connection $conn
  */
 private function requeueError(Connection $conn)
 {
     if ($this->params['errorCount'] >= self::MAX_ERROR_RETRY) {
         LoggerFactory::getInstance('CirrusSearchChangeFailed')->warning("Dropping failing ElasticaWrite job for DataSender::{method} in cluster {cluster} after repeated failure", array('method' => $this->params['method'], 'cluster' => $conn->getClusterName()));
     } else {
         $delay = self::backoffDelay($this->params['errorCount']);
         $job = clone $this;
         $job->params['errorCount']++;
         $job->params['cluster'] = $conn->getClusterName();
         $job->setDelay($delay);
         // Individual failures should have already logged specific errors,
         LoggerFactory::getInstance('CirrusSearch')->info("ElasticaWrite job reported failure on cluster {cluster}. Requeueing job with delay of {delay}.", array('cluster' => $conn->getClusterName(), 'delay' => $delay));
         JobQueueGroup::singleton()->push($job);
     }
 }