/** * @dataProvider provideNamespacesInIndexType */ public function testNamespacesInIndexType($contentNamespaces, $namespaceMappings, $indexType, $expected) { global $wgContentNamespaces, $wgCirrusSearchNamespaceMappings; $wgContentNamespaces = $contentNamespaces; $wgCirrusSearchNamespaceMappings = $namespaceMappings; $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); $conn = new Connection($config); $this->assertEquals($expected, $conn->namespacesInIndexType($indexType)); }
/** * Destroy client connections */ private function destroyClients() { $this->connection->destroyClient(); $this->oldConnection->destroyClient(); // Destroying connections resets timeouts, so we have to reinstate them $this->setConnectionTimeout(); }
public function getConnection() { if ($this->connection === null) { $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); $cluster = $this->decideCluster($config); $this->connection = Connection::getPool($config, $cluster); } return $this->connection; }
protected function decideClusters() { $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); if ($this->params['cluster'] !== null) { // parent::__construct initialized the correct connection $name = $this->connection->getClusterName(); return array($name => $this->connection); } $clusters = $config->get('CirrusSearchClusters'); $connections = array(); foreach (array_keys($clusters) as $name) { $connections[$name] = Connection::getPool($config, $name); } return $connections; }
public function getConnection($cluster = null) { if ($cluster) { $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); if (!$config->getElement('CirrusSearchClusters', $cluster)) { $this->error('Unknown cluster.', 1); } return Connection::getPool($config, $cluster); } if ($this->connection === null) { $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); $cluster = $this->decideCluster($config); $this->connection = Connection::getPool($config, $cluster); } return $this->connection; }
public function __construct($title, $params) { $params += array('cluster' => null); // eg: DeletePages -> cirrusSearchDeletePages $jobName = 'cirrusSearch' . str_replace('CirrusSearch\\Job\\', '', get_class($this)); parent::__construct($jobName, $title, $params); // All CirrusSearch jobs are reasonably expensive. Most involve parsing and it // is ok to remove duplicate _unclaimed_ cirrus jobs. Once a cirrus job is claimed // it can't be deduplicated or else the search index will end up with out of date // data. Luckily, this is how the JobQueue implementations work. $this->removeDuplicates = true; $config = ConfigFactory::getDefaultInstance()->makeConfig('CirrusSearch'); // When the 'cluster' parameter is provided the job must only operate on // the specified cluster, take special care to ensure nested jobs get the // correct cluster set. When set to null all clusters should be written to. $this->connection = Connection::getPool($config, $params['cluster']); }
/** * These values end up serialized into Avro which has strict typing * requirements. float !== int !== string. * * @param float $took Number of milliseconds the request took * @return array */ private function buildLogContext($took) { $client = $this->connection->getClient(); $query = $client->getLastRequest(); $result = $client->getLastResponse(); $params = $this->logContext; $this->logContext = array(); $params += array('tookMs' => intval($took), 'source' => self::getExecutionContext(), 'executor' => self::getExecutionId(), 'identity' => self::generateIdentToken()); if ($result) { $queryData = $query->getData(); $resultData = $result->getData(); $index = explode('/', $query->getPath()); $params['index'] = $index[0]; if (isset($resultData['took'])) { $elasticTook = $resultData['took']; $params['elasticTookMs'] = intval($elasticTook); } if (isset($resultData['hits']['total'])) { $params['hitsTotal'] = intval($resultData['hits']['total']); } if (isset($resultData['hits']['hits'])) { $num = count($resultData['hits']['hits']); $offset = isset($queryData['from']) ? $queryData['from'] : 0; $params['hitsReturned'] = $num; $params['hitsOffset'] = intval($offset); } if ($this->_isset($queryData, array('query', 'filtered', 'filter', 'terms', 'namespace'))) { $namespaces = $queryData['query']['filtered']['filter']['terms']['namespace']; $params['namespaces'] = array_map('intval', $namespaces); } if (isset($resultData['suggest']['suggest'][0]['options'][0]['text'])) { $params['suggestion'] = $resultData['suggest']['suggest'][0]['options'][0]['text']; } } if (self::$logContexts === null) { DeferredUpdates::addCallableUpdate(function () { ElasticsearchIntermediary::reportLogContexts(); }); self::$logContexts = array(); } self::$logContexts[] = $params; return $params; }
/** * Try to detect language using langdetect plugin * See: https://github.com/jprante/elasticsearch-langdetect * @param string $text * @return string|NULL Language name or null */ public static function detectLanguage($text) { $client = Connection::getClient(); try { $response = $client->request("_langdetect", Request::POST, $text); } catch (ResponseException $e) { // This happens when language detection is not configured LoggerFactory::getInstance('CirrusSearch')->warning("Could not connect to language detector: {exception}", array("exception" => $e->getMessage())); return null; } if ($response->isOk()) { $value = $response->getData(); if ($value && !empty($value['languages'])) { $langs = $value['languages']; if (count($langs) == 1) { // TODO: add minimal threshold return $langs[0]['language']; } // FIXME: here I'm just winging it, should be something // that makes sense for multiple languages if (count($langs) == 2) { if ($langs[0]['probability'] > 2 * $langs[1]['probability']) { return $langs[0]['language']; } } } } return null; }
/** * Re-queue job that failed, or drop the job if it has failed * too many times * * @param Connection $conn */ private function requeueError(Connection $conn) { if ($this->params['errorCount'] >= self::MAX_ERROR_RETRY) { LoggerFactory::getInstance('CirrusSearchChangeFailed')->warning("Dropping failing ElasticaWrite job for DataSender::{method} in cluster {cluster} after repeated failure", array('method' => $this->params['method'], 'cluster' => $conn->getClusterName())); } else { $delay = self::backoffDelay($this->params['errorCount']); $job = clone $this; $job->params['errorCount']++; $job->params['cluster'] = $conn->getClusterName(); $job->setDelay($delay); // Individual failures should have already logged specific errors, LoggerFactory::getInstance('CirrusSearch')->info("ElasticaWrite job reported failure on cluster {cluster}. Requeueing job with delay of {delay}.", array('cluster' => $conn->getClusterName(), 'delay' => $delay)); JobQueueGroup::singleton()->push($job); } }