public function testMultipleCoordinates()
 {
     $doc = array('coordinates' => array(array('coord' => array('lat' => 0.7077777777777799, 'lon' => -50.089444444444), 'region' => null, 'dim' => 10000, 'name' => "", 'primary' => true, 'type' => "river", 'globe' => "earth", 'country' => "BR"), array('coord' => array('lat' => -15.518055555556, 'lon' => -71.765277777778), 'region' => null, 'dim' => 10000, 'name' => "", 'primary' => false, 'type' => "river", 'globe' => "earth", 'country' => "BR")));
     $builder = new SuggestBuilder(SuggestScoringMethodFactory::getScoringMethod('incomingLinks', 1));
     $coord = $builder->findPrimaryCoordinates($doc);
     $expected = array('lat' => 0.7077777777777799, 'lon' => -50.089444444444);
     $this->assertSame($expected, $coord);
     $doc['coordinates'][1]['primary'] = true;
     $coord = $builder->findPrimaryCoordinates($doc);
     $expected = array('lat' => 0.7077777777777799, 'lon' => -50.089444444444);
     $this->assertSame($expected, $coord, "With two primaries coord we choose the first one");
     $doc['coordinates'][0]['primary'] = false;
     $coord = $builder->findPrimaryCoordinates($doc);
     $expected = array('lat' => -15.518055555556, 'lon' => -71.765277777778);
     $this->assertSame($expected, $coord, "Choose primary coord even if it's not the first one.");
     $doc['coordinates'][1]['primary'] = false;
     $coord = $builder->findPrimaryCoordinates($doc);
     $expected = array('lat' => 0.7077777777777799, 'lon' => -50.089444444444);
     $this->assertSame($expected, $coord, "Choose first coord if there's no primary.");
     $doc['coordinates'][0]['primary'] = true;
     $doc['coordinates'][0]['globe'] = 'Magrathea';
     $coord = $builder->findPrimaryCoordinates($doc);
     $expected = array('lat' => -15.518055555556, 'lon' => -71.765277777778);
     $this->assertSame($expected, $coord, "Choose first coord on earth.");
     $doc['coordinates'][1]['globe'] = 'Magrathea';
     $coord = $builder->findPrimaryCoordinates($doc);
     $this->assertNull($coord, "No coord if none is on earth.");
 }
 /**
  * merge top level multi-queries and resolve returned pageIds into Title objects.
  *
  * WARNING: experimental API
  *
  * @param string $query the user query
  * @param \Elastica\Response $response Response from elasticsearch _suggest api
  * @param array $profiles the suggestion profiles
  * @param int $limit Maximum suggestions to return, -1 for unlimited
  * @return SearchSuggestionSet a set of Suggestions
  */
 protected function postProcessSuggest(\Elastica\Response $response, $profiles, $limit = -1)
 {
     $this->logContext['elasticTookMs'] = intval($response->getQueryTime() * 1000);
     $data = $response->getData();
     unset($data['_shards']);
     $suggestions = array();
     foreach ($data as $name => $results) {
         $discount = $profiles[$name]['discount'];
         foreach ($results as $suggested) {
             foreach ($suggested['options'] as $suggest) {
                 $output = SuggestBuilder::decodeOutput($suggest['text']);
                 if ($output === null) {
                     // Ignore broken output
                     continue;
                 }
                 $pageId = $output['id'];
                 $type = $output['type'];
                 $score = $discount * $suggest['score'];
                 if (!isset($suggestions[$pageId]) || $score > $suggestions[$pageId]->getScore()) {
                     $suggestion = new SearchSuggestion($score, null, null, $pageId);
                     // If it's a title suggestion we have the text
                     if ($type === SuggestBuilder::TITLE_SUGGESTION) {
                         $suggestion->setText($output['text']);
                     }
                     $suggestions[$pageId] = $suggestion;
                 }
             }
         }
     }
     // simply sort by existing scores
     uasort($suggestions, function ($a, $b) {
         return $b->getScore() - $a->getScore();
     });
     $this->logContext['hitsTotal'] = count($suggestions);
     if ($limit > 0) {
         $suggestions = array_slice($suggestions, 0, $limit, true);
     }
     $this->logContext['hitsReturned'] = count($suggestions);
     $this->logContext['hitsOffset'] = 0;
     // we must fetch redirect data for redirect suggestions
     $missingText = array();
     foreach ($suggestions as $id => $suggestion) {
         if ($suggestion->getText() === null) {
             $missingText[] = $id;
         }
     }
     if (!empty($missingText)) {
         // Experimental.
         //
         // Second pass query to fetch redirects.
         // It's not clear if it's the best option, this will slowdown the whole query
         // when we hit a redirect suggestion.
         // Other option would be to encode redirects as a payload resulting in a
         // very big index...
         // XXX: we support only the content index
         $type = $this->connection->getPageType($this->indexBaseName, Connection::CONTENT_INDEX_TYPE);
         // NOTE: we are already in a poolCounterWork
         // Multi get is not supported by elastica
         $redirResponse = null;
         try {
             $redirResponse = $type->request('_mget', 'GET', array('ids' => $missingText), array('_source_include' => 'redirect'));
             if ($redirResponse->isOk()) {
                 $this->logContext['elasticTook2PassMs'] = intval($redirResponse->getQueryTime() * 1000);
                 $docs = $redirResponse->getData();
                 foreach ($docs['docs'] as $doc) {
                     if (empty($doc['_source']['redirect'])) {
                         continue;
                     }
                     // We use the original query, we should maybe use the variant that generated this result?
                     $text = Util::chooseBestRedirect($this->term, $doc['_source']['redirect']);
                     if (!empty($suggestions[$doc['_id']])) {
                         $suggestions[$doc['_id']]->setText($text);
                     }
                 }
             } else {
                 LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $redirResponse->getError()));
             }
         } catch (\Elastica\Exception\ExceptionInterface $e) {
             LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $this->extractMessage($e)));
         }
     }
     return new SearchSuggestionSet(array_filter($suggestions, function ($suggestion) {
         // text should be not empty for suggestions
         return $suggestion->getText() != null;
     }));
 }
 private function indexData()
 {
     $query = new Query();
     $query->setFields(array('_id', '_type', '_source'));
     // Exclude content fields to save bandwidth
     $query->setSource(array('exclude' => array('text', 'source_text', 'opening_text', 'auxiliary_text')));
     $query->setQuery(new Elastica\Query\Filtered(new Elastica\Query\MatchAll(), new Elastica\Filter\BoolAnd(array(new Elastica\Filter\Type(Connection::PAGE_TYPE_NAME), new Elastica\Filter\Term(array("namespace" => NS_MAIN))))));
     $scrollOptions = array('search_type' => 'scan', 'scroll' => "15m", 'size' => $this->indexChunkSize);
     // TODO: only content index for now ( we'll have to check how it works with commons )
     $sourceIndex = $this->getConnection()->getIndex($this->indexBaseName, Connection::CONTENT_INDEX_TYPE);
     $result = $sourceIndex->search($query, $scrollOptions);
     $totalDocsInIndex = $result->getResponse()->getData();
     $totalDocsInIndex = $totalDocsInIndex['hits']['total'];
     $totalDocsToDump = $totalDocsInIndex;
     $scoreMethodName = $this->getOption('scoringMethod', 'quality');
     $this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod($scoreMethodName, $totalDocsInIndex);
     $builder = new SuggestBuilder($this->scoreMethod, $this->withGeo);
     $docsDumped = 0;
     $this->output("Indexing {$totalDocsToDump} documents ({$totalDocsInIndex} in the index)\n");
     $self = $this;
     $destinationType = $this->getIndex()->getType(Connection::TITLE_SUGGEST_TYPE_NAME);
     $retryAttempts = $this->indexRetryAttempts;
     Util::iterateOverScroll($sourceIndex, $result->getResponse()->getScrollId(), '15m', function ($results) use($self, &$docsDumped, $totalDocsToDump, $builder, $destinationType, $retryAttempts) {
         $suggestDocs = array();
         foreach ($results as $result) {
             $docsDumped++;
             $suggests = $builder->build($result->getId(), $result->getSource());
             foreach ($suggests as $suggest) {
                 $suggestDocs[] = new \Elastica\Document(null, $suggest);
             }
         }
         $self->outputProgress($docsDumped, $totalDocsToDump);
         Util::withRetry($retryAttempts, function () use($destinationType, $suggestDocs) {
             $destinationType->addDocuments($suggestDocs);
         });
     }, 0, $retryAttempts);
     $this->output("Indexing done.\n");
 }
 public function provideOutputEncoder()
 {
     return array('title' => array(array('id' => 123, 'type' => SuggestBuilder::TITLE_SUGGESTION, 'text' => 'This is a title'), SuggestBuilder::encodeTitleOutput(123, "This is a title")), 'redirect' => array(array('id' => 123, 'type' => SuggestBuilder::REDIRECT_SUGGESTION), SuggestBuilder::encodeRedirectOutput(123)), 'Garbage' => array(null, 'Garbage'), 'Broken title' => array(null, '123:t'), 'Partial encoding' => array(null, '123:'), 'null output' => array(null, null));
 }