public function testUlm() { $builder = new SuggestBuilder(SuggestScoringMethodFactory::getScoringMethod('incomingLinks', 1)); $score = 10; $redirScore = (int) ($score * SuggestBuilder::REDIRECT_DISCOUNT); $doc = array('title' => 'Ulm', 'redirect' => array(array('title' => 'UN/LOCODE:DEULM', 'namespace' => 0), array('title' => 'Ulm, Germany', 'namespace' => 0), array('title' => "Ulm displaced persons camp", 'namespace' => 0), array('title' => "Söflingen", 'namespace' => 0)), 'coordinates' => array(array('coord' => array('lat' => 48.3985, 'lon' => 9.9918), 'region' => "BW", 'dim' => 10000, 'name' => "", 'primary' => true, 'type' => "city", 'globe' => "earth", 'country' => "DE")), 'incoming_links' => $score); $expected = array(array('suggest' => array('input' => array('Ulm'), 'output' => '1:t:Ulm', 'weight' => $score), 'suggest-stop' => array('input' => array('Ulm'), 'output' => '1:t:Ulm', 'weight' => $score), 'suggest-geo' => array('input' => array('Ulm'), 'output' => '1:t:Ulm', 'weight' => $score, 'context' => array('location' => array('lat' => 48.3985, 'lon' => 9.9918))), 'suggest-stop-geo' => array('input' => array('Ulm'), 'output' => '1:t:Ulm', 'weight' => $score, 'context' => array('location' => array('lat' => 48.3985, 'lon' => 9.9918)))), array('suggest' => array('input' => array('UN/LOCODE:DEULM', 'Ulm, Germany', 'Ulm displaced persons camp', 'Söflingen'), 'output' => '1:r', 'weight' => $redirScore), 'suggest-stop' => array('input' => array('UN/LOCODE:DEULM', 'Ulm, Germany', 'Ulm displaced persons camp', 'Söflingen'), 'output' => '1:r', 'weight' => $redirScore), 'suggest-geo' => array('input' => array('UN/LOCODE:DEULM', 'Ulm, Germany', 'Ulm displaced persons camp', 'Söflingen'), 'output' => '1:r', 'weight' => $redirScore, 'context' => array('location' => array('lat' => 48.3985, 'lon' => 9.9918))), 'suggest-stop-geo' => array('input' => array('UN/LOCODE:DEULM', 'Ulm, Germany', 'Ulm displaced persons camp', 'Söflingen'), 'output' => '1:r', 'weight' => $redirScore, 'context' => array('location' => array('lat' => 48.3985, 'lon' => 9.9918))))); $suggestions = $builder->build(1, $doc); $this->assertSame($expected, $suggestions); }
private function indexData() { $query = new Query(); $query->setFields(array('_id', '_type', '_source')); // Exclude content fields to save bandwidth $query->setSource(array('exclude' => array('text', 'source_text', 'opening_text', 'auxiliary_text'))); $query->setQuery(new Elastica\Query\Filtered(new Elastica\Query\MatchAll(), new Elastica\Filter\BoolAnd(array(new Elastica\Filter\Type(Connection::PAGE_TYPE_NAME), new Elastica\Filter\Term(array("namespace" => NS_MAIN)))))); $scrollOptions = array('search_type' => 'scan', 'scroll' => "15m", 'size' => $this->indexChunkSize); // TODO: only content index for now ( we'll have to check how it works with commons ) $sourceIndex = $this->getConnection()->getIndex($this->indexBaseName, Connection::CONTENT_INDEX_TYPE); $result = $sourceIndex->search($query, $scrollOptions); $totalDocsInIndex = $result->getResponse()->getData(); $totalDocsInIndex = $totalDocsInIndex['hits']['total']; $totalDocsToDump = $totalDocsInIndex; $scoreMethodName = $this->getOption('scoringMethod', 'quality'); $this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod($scoreMethodName, $totalDocsInIndex); $builder = new SuggestBuilder($this->scoreMethod, $this->withGeo); $docsDumped = 0; $this->output("Indexing {$totalDocsToDump} documents ({$totalDocsInIndex} in the index)\n"); $self = $this; $destinationType = $this->getIndex()->getType(Connection::TITLE_SUGGEST_TYPE_NAME); $retryAttempts = $this->indexRetryAttempts; Util::iterateOverScroll($sourceIndex, $result->getResponse()->getScrollId(), '15m', function ($results) use($self, &$docsDumped, $totalDocsToDump, $builder, $destinationType, $retryAttempts) { $suggestDocs = array(); foreach ($results as $result) { $docsDumped++; $suggests = $builder->build($result->getId(), $result->getSource()); foreach ($suggests as $suggest) { $suggestDocs[] = new \Elastica\Document(null, $suggest); } } $self->outputProgress($docsDumped, $totalDocsToDump); Util::withRetry($retryAttempts, function () use($destinationType, $suggestDocs) { $destinationType->addDocuments($suggestDocs); }); }, 0, $retryAttempts); $this->output("Indexing done.\n"); }