/** * merge top level multi-queries and resolve returned pageIds into Title objects. * * WARNING: experimental API * * @param string $query the user query * @param \Elastica\Response $response Response from elasticsearch _suggest api * @param array $profiles the suggestion profiles * @param int $limit Maximum suggestions to return, -1 for unlimited * @return SearchSuggestionSet a set of Suggestions */ protected function postProcessSuggest(\Elastica\Response $response, $profiles, $limit = -1) { $this->logContext['elasticTookMs'] = intval($response->getQueryTime() * 1000); $data = $response->getData(); unset($data['_shards']); $suggestions = array(); foreach ($data as $name => $results) { $discount = $profiles[$name]['discount']; foreach ($results as $suggested) { foreach ($suggested['options'] as $suggest) { $output = SuggestBuilder::decodeOutput($suggest['text']); if ($output === null) { // Ignore broken output continue; } $pageId = $output['id']; $type = $output['type']; $score = $discount * $suggest['score']; if (!isset($suggestions[$pageId]) || $score > $suggestions[$pageId]->getScore()) { $suggestion = new SearchSuggestion($score, null, null, $pageId); // If it's a title suggestion we have the text if ($type === SuggestBuilder::TITLE_SUGGESTION) { $suggestion->setText($output['text']); } $suggestions[$pageId] = $suggestion; } } } } // simply sort by existing scores uasort($suggestions, function ($a, $b) { return $b->getScore() - $a->getScore(); }); $this->logContext['hitsTotal'] = count($suggestions); if ($limit > 0) { $suggestions = array_slice($suggestions, 0, $limit, true); } $this->logContext['hitsReturned'] = count($suggestions); $this->logContext['hitsOffset'] = 0; // we must fetch redirect data for redirect suggestions $missingText = array(); foreach ($suggestions as $id => $suggestion) { if ($suggestion->getText() === null) { $missingText[] = $id; } } if (!empty($missingText)) { // Experimental. // // Second pass query to fetch redirects. // It's not clear if it's the best option, this will slowdown the whole query // when we hit a redirect suggestion. // Other option would be to encode redirects as a payload resulting in a // very big index... // XXX: we support only the content index $type = $this->connection->getPageType($this->indexBaseName, Connection::CONTENT_INDEX_TYPE); // NOTE: we are already in a poolCounterWork // Multi get is not supported by elastica $redirResponse = null; try { $redirResponse = $type->request('_mget', 'GET', array('ids' => $missingText), array('_source_include' => 'redirect')); if ($redirResponse->isOk()) { $this->logContext['elasticTook2PassMs'] = intval($redirResponse->getQueryTime() * 1000); $docs = $redirResponse->getData(); foreach ($docs['docs'] as $doc) { if (empty($doc['_source']['redirect'])) { continue; } // We use the original query, we should maybe use the variant that generated this result? $text = Util::chooseBestRedirect($this->term, $doc['_source']['redirect']); if (!empty($suggestions[$doc['_id']])) { $suggestions[$doc['_id']]->setText($text); } } } else { LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $redirResponse->getError())); } } catch (\Elastica\Exception\ExceptionInterface $e) { LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $this->term, 'ids' => serialize($missingText), 'error' => $this->extractMessage($e))); } } return new SearchSuggestionSet(array_filter($suggestions, function ($suggestion) { // text should be not empty for suggestions return $suggestion->getText() != null; })); }
/** * merge top level multi-queries and resolve returned pageIds into Title objects. * * WARNING: experimental API * * @param string $query the user query * @param \Elastica\Response $response Response from elasticsearch _suggest api * @param array $profile the suggestion profile * @param int $limit Maximum suggestions to return, -1 for unlimited * @return Title[] List of suggested titles */ protected function postProcessSuggest($query, \Elastica\Response $response, $profile, $limit = -1) { $this->logContext['elasticTookMs'] = intval($response->getQueryTime() * 1000); $data = $response->getData(); unset($data['_shards']); $suggestions = array(); foreach ($data as $name => $results) { $discount = $profile[$name]['discount']; foreach ($results as $suggested) { foreach ($suggested['options'] as $suggest) { $output = explode(':', $suggest['text'], 3); if (sizeof($output) < 2) { // Ignore broken output continue; } $pageId = $output[0]; $type = $output[1]; $score = $discount * $suggest['score']; if (!isset($suggestions[$pageId]) || $score > $suggestions[$pageId]['score']) { $suggestion = array('score' => $score, 'pageId' => $pageId); // If it's a title suggestion we have the text if ($type === 't' && sizeof($output) == 3) { $suggestion['text'] = $output[2]; } $suggestions[$pageId] = $suggestion; } } } } // simply sort by existing scores uasort($suggestions, function ($a, $b) { return $b['score'] - $a['score']; }); $this->logContext['hitsTotal'] = count($suggestions); if ($limit > 0) { $suggestions = array_slice($suggestions, 0, $limit, true); } $this->logContext['hitsReturned'] = count($suggestions); $this->logContext['hitsOffset'] = 0; // we must fetch redirect data for redirect suggestions $missingText = array(); foreach ($suggestions as $id => $suggestion) { if (!isset($suggestion['text'])) { $missingText[] = $id; } } if (!empty($missingText)) { // Experimental. // // Second pass query to fetch redirects. // It's not clear if it's the best option, this will slowdown the whole query // when we hit a redirect suggestion. // Other option would be to encode redirects as a payload resulting in a // very big index... // XXX: we support only the content index $type = $this->connection->getPageType($this->indexBaseName, Connection::CONTENT_INDEX_TYPE); // NOTE: we are already in a poolCounterWork // Multi get is not supported by elastica $redirResponse = null; try { $redirResponse = $type->request('_mget', 'GET', array('ids' => $missingText), array('_source_include' => 'redirect')); if ($redirResponse->isOk()) { $this->logContext['elasticTook2PassMs'] = intval($redirResponse->getQueryTime() * 1000); $docs = $redirResponse->getData(); $docs = $docs['docs']; foreach ($docs as $doc) { $id = $doc['_id']; if (!isset($doc['_source']['redirect']) || empty($doc['_source']['redirect'])) { continue; } $text = Util::chooseBestRedirect($query, $doc['_source']['redirect']); $suggestions[$id]['text'] = $text; } } else { LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $query, 'ids' => serialize($missingText), 'error' => $redirResponse->getError())); } } catch (\Elastica\Exception\ExceptionInterface $e) { LoggerFactory::getInstance('CirrusSearch')->warning('Unable to fetch redirects for suggestion {query} with results {ids} : {error}', array('query' => $query, 'ids' => serialize($missingText), 'error' => $this->extractMessage($e))); } } $retval = array(); foreach ($suggestions as $suggestion) { if (!isset($suggestion['text'])) { // We were unable to find a text to display // Maybe a page with redirects when we built the suggester index // but now without redirects? continue; } $retval[] = array('title' => Title::makeTitle(0, $suggestion['text']), 'pageId' => $suggestion['pageId'], 'score' => $suggestion['score']); } return $retval; }
public function testChooseBestRedirect() { $convert = function ($x) { $redirect = array(); foreach ($x as $t) { $redirect[] = array('title' => $t, 'namespace' => 0); } return $redirect; }; $input = $convert(array('Al. Einstein', 'Albert Einstein', 'A. Einstein', 'Einstein, Albert')); $this->assertEquals('Al. Einstein', Util::chooseBestRedirect('a', $input)); $this->assertEquals('Al. Einstein', Util::chooseBestRedirect('al', $input)); $this->assertEquals('Albert Einstein', Util::chooseBestRedirect('albet', $input)); $this->assertEquals('Einstein, Albert', Util::chooseBestRedirect('Einstein', $input)); $this->assertEquals('Einstein, Albert', Util::chooseBestRedirect('Ens', $input)); }