/** * @return Status */ public function validate() { $this->outputIndented("\tValidating number of shards..."); $settings = $this->index->getSettings()->get(); $actualShardCount = $settings['number_of_shards']; if ($actualShardCount == $this->shardCount) { $this->output("ok\n"); } else { $this->output("is {$actualShardCount} but should be " . $this->shardCount . "...cannot correct!\n"); return Status::newFatal(new RawMessage("Number of shards is incorrect and cannot be changed without a rebuild. You can solve this\n" . "problem by running this program again with either --startOver or --reindexAndRemoveOk. Make\n" . "sure you understand the consequences of either choice.. This script will now continue to\n" . "validate everything else.")); } return Status::newGood(); }
/** * @return Status */ public function validate() { $this->outputIndented("Validating analyzers..."); $settings = $this->index->getSettings()->get(); $requiredAnalyzers = $this->analysisConfigBuilder->buildConfig(); if ($this->checkConfig($settings['analysis'], $requiredAnalyzers)) { $this->output("ok\n"); } else { $this->output("cannot correct\n"); return Status::newFatal(new RawMessage("This script encountered an index difference that requires that the index be\n" . "copied, indexed to, and then the old index removed. Re-run this script with the\n" . "--reindexAndRemoveOk --indexIdentifier=now parameters to do this.")); } return Status::newGood(); }
/** * @return Status */ public function validate() { $this->outputIndented("\tValidating replica range..."); $settings = $this->index->getSettings()->get(); $actualReplicaCount = isset($settings['auto_expand_replicas']) ? $settings['auto_expand_replicas'] : 'false'; if ($actualReplicaCount == $this->replicaCount) { $this->output("ok\n"); } else { $this->output("is {$actualReplicaCount} but should be " . $this->replicaCount . '...'); $this->index->getSettings()->set(array('auto_expand_replicas' => $this->replicaCount)); $this->output("corrected\n"); } return Status::newGood(); }
/** * @group functional */ public function testSearch() { $client = $this->_getClient(); $index = new Index($client, 'test'); $index->create(array(), true); $index->getSettings()->setNumberOfReplicas(0); //$index->getSettings()->setNumberOfShards(1); $type = new Type($index, 'helloworldmlt'); $mapping = new Mapping($type, array('email' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'), 'content' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'))); $mapping->setSource(array('enabled' => false)); $type->setMapping($mapping); $doc = new Document(1000, array('email' => '*****@*****.**', 'content' => 'This is a sample post. Hello World Fuzzy Like This!')); $type->addDocument($doc); $doc = new Document(1001, array('email' => '*****@*****.**', 'content' => 'This is a fake nospam email address for gmail')); $type->addDocument($doc); // Refresh index $index->refresh(); $mltQuery = new MoreLikeThis(); $mltQuery->setLike('fake gmail sample'); $mltQuery->setFields(array('email', 'content')); $mltQuery->setMaxQueryTerms(3); $mltQuery->setMinDocFrequency(1); $mltQuery->setMinTermFrequency(1); $query = new Query(); $query->setQuery($mltQuery); $resultSet = $type->search($query); $resultSet->getResponse()->getData(); $this->assertEquals(2, $resultSet->count()); }
/** * Dump everything from the live index into the one being worked on. * * @param int $processes * @param int $refreshInterval * @param int $retryAttempts * @param int $chunkSize * @param float $acceptableCountDeviation */ public function reindex($processes = 1, $refreshInterval = 1, $retryAttempts = 5, $chunkSize = 100, $acceptableCountDeviation = 0.05) { global $wgCirrusSearchWikimediaExtraPlugin; // Set some settings that should help io load during bulk indexing. We'll have to // optimize after this to consolidate down to a proper number of shards but that is // is worth the price. total_shards_per_node will help to make sure that each shard // has as few neighbors as possible. $settings = $this->index->getSettings(); $maxShardsPerNode = $this->decideMaxShardsPerNodeForReindex(); $settings->set(array('refresh_interval' => -1, 'merge.policy.segments_per_tier' => 40, 'merge.policy.max_merge_at_once' => 40, 'routing.allocation.total_shards_per_node' => $maxShardsPerNode)); $sender = new DataSender($this->connection); $frozenIndexes = $this->connection->indexToIndexTypes($this->types); $sender->freezeIndexes($frozenIndexes); if ($processes > 1) { if (!isset($wgCirrusSearchWikimediaExtraPlugin['id_hash_mod_filter']) || !$wgCirrusSearchWikimediaExtraPlugin['id_hash_mod_filter']) { $this->error("Can't use multiple processes without \$wgCirrusSearchWikimediaExtraPlugin[ 'id_hash_mod_filter' ] = true", 1); } $fork = new ForkController($processes); $forkResult = $fork->start(); // we don't want to share sockets between forks, so destroy the client. $this->connection->destroyClient(); // destroying the client resets the timeout so we have to reinstate it. $this->setConnectionTimeout(); switch ($forkResult) { case 'child': foreach ($this->types as $i => $type) { $oldType = $this->oldTypes[$i]; $this->reindexInternal($type, $oldType, $processes, $fork->getChildNumber(), $chunkSize, $retryAttempts); } die(0); case 'done': break; default: $this->error("Unexpected result while forking: {$forkResult}", 1); } $this->outputIndented("Verifying counts..."); // We can't verify counts are exactly equal because they won't be - we still push updates into // the old index while reindexing the new one. foreach ($this->types as $i => $type) { $oldType = $this->oldTypes[$i]; $oldCount = (double) $oldType->count(); $this->index->refresh(); $newCount = (double) $type->count(); $difference = $oldCount > 0 ? abs($oldCount - $newCount) / $oldCount : 0; if ($difference > $acceptableCountDeviation) { $this->output("Not close enough! old={$oldCount} new={$newCount} difference={$difference}\n"); $this->error('Failed to load index - counts not close enough. ' . "old={$oldCount} new={$newCount} difference={$difference}. " . 'Check for warnings above.', 1); } } $this->output("done\n"); } else { foreach ($this->types as $i => $type) { $oldType = $this->oldTypes[$i]; $this->reindexInternal($type, $oldType, 1, 1, $chunkSize, $retryAttempts); } } // Revert settings changed just for reindexing $settings->set(array('refresh_interval' => $refreshInterval . 's', 'merge.policy' => $this->mergeSettings)); $sender->thawIndexes($frozenIndexes); }
/** * @return Status */ public function validate() { $this->outputIndented("\tValidating max shards per node..."); $settings = $this->index->getSettings()->get(); // Elasticsearch uses negative numbers or an unset value to represent unlimited. We use the word 'unlimited' // because that is easier to read. $actualMaxShardsPerNode = isset($settings['routing']['allocation']['total_shards_per_node']) ? $settings['routing']['allocation']['total_shards_per_node'] : 'unlimited'; $actualMaxShardsPerNode = $actualMaxShardsPerNode < 0 ? 'unlimited' : $actualMaxShardsPerNode; $expectedMaxShardsPerNode = $this->maxShardsPerNode; if ($actualMaxShardsPerNode == $expectedMaxShardsPerNode) { $this->output("ok\n"); } else { $this->output("is {$actualMaxShardsPerNode} but should be {$expectedMaxShardsPerNode}..."); $expectedMaxShardsPerNode = $expectedMaxShardsPerNode === 'unlimited' ? -1 : $expectedMaxShardsPerNode; $this->index->getSettings()->set(array('routing.allocation.total_shards_per_node' => $expectedMaxShardsPerNode)); $this->output("corrected\n"); } return Status::newGood(); }
public function testSearch() { $client = $this->_getClient(); $index = new Index($client, 'test'); $index->create(array(), true); $index->getSettings()->setNumberOfReplicas(0); //$index->getSettings()->setNumberOfShards(1); $type = new Type($index, 'helloworld'); $doc = new Document(1, array('email' => '*****@*****.**', 'username' => 'hanswurst', 'test' => array('2', '3', '5'))); $type->addDocument($doc); // Refresh index $index->refresh(); $queryString = new QueryString('test*'); $resultSet = $type->search($queryString); $this->assertEquals(1, $resultSet->count()); }
public function testSearch() { $client = $this->_getClient(); $index = new Index($client, 'test'); $index->create(array(), true); $index->getSettings()->setNumberOfReplicas(0); //$index->getSettings()->setNumberOfShards(1); $type = new Type($index, 'helloworldfuzzy'); $mapping = new Mapping($type, array('email' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'), 'content' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'))); $mapping->setSource(array('enabled' => false)); $type->setMapping($mapping); $doc = new Document(1000, array('email' => '*****@*****.**', 'content' => 'This is a sample post. Hello World Fuzzy Like This!')); $type->addDocument($doc); // Refresh index $index->refresh(); $fltQuery = new FuzzyLikeThis(); $fltQuery->setLikeText("sample gmail"); $fltQuery->addFields(array("email", "content")); $fltQuery->setMinSimilarity(0.3); $fltQuery->setMaxQueryTerms(3); $resultSet = $type->search($fltQuery); $this->assertEquals(1, $resultSet->count()); }
private function set($type, $allocation) { $this->index->getSettings()->set(array('routing' => array('allocation' => array($type => $allocation)))); }
public function testSetReadOnly() { $client = $this->_getClient(); $index = new Index($client, 'elastica_test'); $index->getSettings()->setReadOnly(false); $index = $this->_createIndex(); // Add document to normal index $doc1 = new Document(null, array('hello' => 'world')); $doc2 = new Document(null, array('hello' => 'world')); $doc3 = new Document(null, array('hello' => 'world')); $type = $index->getType('test'); $type->addDocument($doc1); $this->assertFalse((bool) $index->getSettings()->get('blocks.read_only')); // Try to add doc to read only index $index->getSettings()->setReadOnly(true); $this->assertTrue((bool) $index->getSettings()->get('blocks.read_only')); try { $type->addDocument($doc2); $this->fail('Should throw exception because of read only'); } catch (ResponseException $e) { $message = $e->getMessage(); $this->assertContains('ClusterBlockException', $message); $this->assertContains('index read-only', $message); } // Remove read only, add document $response = $index->getSettings()->setReadOnly(false); $this->assertTrue($response->isOk()); $type->addDocument($doc3); $index->refresh(); $this->assertEquals(2, $type->count()); }
public function testSearchSetAnalyzer() { $client = $this->_getClient(); $index = new Index($client, 'test'); $index->create(array('analysis' => array('analyzer' => array('searchAnalyzer' => array('type' => 'custom', 'tokenizer' => 'standard', 'filter' => array('myStopWords'))), 'filter' => array('myStopWords' => array('type' => 'stop', 'stopwords' => array('The'))))), true); $index->getSettings()->setNumberOfReplicas(0); //$index->getSettings()->setNumberOfShards(1); $type = new Type($index, 'helloworldfuzzy'); $mapping = new Mapping($type, array('email' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'), 'content' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'))); $mapping->setSource(array('enabled' => false)); $type->setMapping($mapping); $doc = new Document(1000, array('email' => '*****@*****.**', 'content' => 'The Fuzzy Test!')); $type->addDocument($doc); $doc = new Document(1001, array('email' => '*****@*****.**', 'content' => 'Elastica Fuzzy Test')); $type->addDocument($doc); // Refresh index $index->refresh(); $fltQuery = new FuzzyLikeThis(); $fltQuery->addFields(array("email", "content")); $fltQuery->setLikeText("The"); $fltQuery->setMinSimilarity(0.1); $fltQuery->setMaxQueryTerms(3); // Test before analyzer applied, should return 1 result $resultSet = $type->search($fltQuery); $this->assertEquals(1, $resultSet->count()); $fltQuery->setParam('analyzer', 'searchAnalyzer'); $resultSet = $type->search($fltQuery); $this->assertEquals(0, $resultSet->count()); }