/** * Powers full-text-like searches including prefix search. * * @param string $type * @param string $for * @return Status(mixed) results from the query transformed by the resultsType */ private function search($type, $for) { if ($this->nonTextQueries) { $bool = new \Elastica\Query\Bool(); if ($this->query !== null) { $bool->addMust($this->query); } foreach ($this->nonTextQueries as $nonTextQuery) { $bool->addMust($nonTextQuery); } $this->query = $bool; } if ($this->resultsType === null) { $this->resultsType = new FullTextResultsType(FullTextResultsType::HIGHLIGHT_ALL); } // Default null queries now so the rest of the method can assume it is not null. if ($this->query === null) { $this->query = new \Elastica\Query\MatchAll(); } $query = new Elastica\Query(); $query->setParam('_source', $this->resultsType->getSourceFiltering()); $query->setParam('fields', $this->resultsType->getFields()); $extraIndexes = array(); $indexType = $this->pickIndexTypeFromNamespaces(); if ($this->namespaces) { $extraIndexes = $this->getAndFilterExtraIndexes(); if ($this->needNsFilter($extraIndexes, $indexType)) { $this->filters[] = new \Elastica\Filter\Terms('namespace', $this->namespaces); } } // Wrap $this->query in a filtered query if there are any filters $unifiedFilter = Filters::unify($this->filters, $this->notFilters); if ($unifiedFilter !== null) { $this->query = new \Elastica\Query\Filtered($this->query, $unifiedFilter); } // Call installBoosts right after we're done munging the query to include filters // so any rescores installBoosts adds to the query are done against filtered results. $this->installBoosts(); $query->setQuery($this->query); $highlight = $this->resultsType->getHighlightingConfiguration($this->highlightSource); if ($highlight) { // Fuzzy queries work _terribly_ with the plain highlighter so just drop any field that is forcing // the plain highlighter all together. Do this here because this works so badly that no // ResultsType should be able to use the plain highlighter for these queries. if ($this->fuzzyQuery) { $highlight['fields'] = array_filter($highlight['fields'], function ($field) { return $field['type'] !== 'plain'; }); } if (!empty($this->nonTextHighlightQueries)) { // We have some phrase_prefix queries, so let's include them in the // generated highlight_query. $bool = new \Elastica\Query\Bool(); if ($this->highlightQuery) { $bool->addShould($this->highlightQuery); } foreach ($this->nonTextHighlightQueries as $nonTextHighlightQuery) { $bool->addShould($nonTextHighlightQuery); } $this->highlightQuery = $bool; } if ($this->highlightQuery) { $highlight['highlight_query'] = $this->highlightQuery->toArray(); } $query->setHighlight($highlight); } if ($this->suggest) { $query->setParam('suggest', $this->suggest); $query->addParam('stats', 'suggest'); } if ($this->offset) { $query->setFrom($this->offset); } if ($this->limit) { $query->setSize($this->limit); } if ($this->sort != 'relevance') { // Clear rescores if we aren't using relevance as the search sort because they aren't used. $this->rescore = array(); } if ($this->rescore) { // rescore_query has to be in array form before we send it to Elasticsearch but it is way easier to work // with if we leave it in query for until now $modifiedRescore = array(); foreach ($this->rescore as $rescore) { $rescore['query']['rescore_query'] = $rescore['query']['rescore_query']->toArray(); $modifiedRescore[] = $rescore; } $query->setParam('rescore', $modifiedRescore); } $query->addParam('stats', $type); switch ($this->sort) { case 'relevance': break; // The default // The default case 'title_asc': $query->setSort(array('title.keyword' => 'asc')); break; case 'title_desc': $query->setSort(array('title.keyword' => 'desc')); break; case 'incoming_links_asc': $query->setSort(array('incoming_links' => array('order' => 'asc', 'missing' => '_first'))); break; case 'incoming_links_desc': $query->setSort(array('incoming_links' => array('order' => 'desc', 'missing' => '_last'))); break; default: LoggerFactory::getInstance('CirrusSearch')->warning("Invalid sort type: {sort}", array('sort' => $this->sort)); } $queryOptions = array(); if ($this->config->get('CirrusSearchMoreAccurateScoringMode')) { $queryOptions['search_type'] = 'dfs_query_then_fetch'; } switch ($type) { case 'regex': $poolCounterType = 'CirrusSearch-Regex'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'regex'); break; case 'prefix': $poolCounterType = 'CirrusSearch-Prefix'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); break; default: $poolCounterType = 'CirrusSearch-Search'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); } $this->connection->setTimeout($queryOptions['timeout']); // Setup the search $pageType = $this->connection->getPageType($this->indexBaseName, $indexType); $search = $pageType->createSearch($query, $queryOptions); foreach ($extraIndexes as $i) { $search->addIndex($i); } $description = "{queryType} search for '{query}'"; $logContext = array('queryType' => $type, 'query' => $for); if ($this->returnQuery) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'params' => $search->getOptions(), 'query' => $query->toArray(), 'options' => $queryOptions)); } if ($this->returnExplain && $this->returnResult) { $query->setExplain(true); } // Perform the search $searcher = $this; $user = $this->user; $result = Util::doPoolCounterWork($poolCounterType, $this->user, function () use($searcher, $search, $description, $logContext) { try { $searcher->start($description, $logContext); return $searcher->success($search->search()); } catch (\Elastica\Exception\ExceptionInterface $e) { return $searcher->failure($e); } }, function ($error, $key, $userName) use($type, $description, $user, $logContext) { $forUserName = $userName ? "for {userName} " : ''; LoggerFactory::getInstance('CirrusSearch')->warning("Pool error {$forUserName}on key {key} during {$description}: {error}", $logContext + array('userName' => $userName, 'key' => 'key', 'error' => $error)); if ($error === 'pool-queuefull') { if (strpos($key, 'nowait:CirrusSearch:_per_user') === 0) { $loggedIn = $user->isLoggedIn() ? 'logged-in' : 'anonymous'; return Status::newFatal("cirrussearch-too-busy-for-you-{$loggedIn}-error"); } if ($type === 'regex') { return Status::newFatal('cirrussearch-regex-too-busy-error'); } return Status::newFatal('cirrussearch-too-busy-error'); } return Status::newFatal('cirrussearch-backend-error'); }); if ($result->isOK()) { $responseData = $result->getValue()->getResponse()->getData(); if ($this->returnResult) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'result' => $responseData)); } $result->setResult(true, $this->resultsType->transformElasticsearchResult($this->suggestPrefixes, $this->suggestSuffixes, $result->getValue(), $this->searchContext->isSearchContainedSyntax())); if (isset($responseData['timed_out']) && $responseData['timed_out']) { LoggerFactory::getInstance('CirrusSearch')->warning("{$description} timed out and only returned partial results!", $logContext); if ($result->getValue()->numRows() === 0) { return Status::newFatal('cirrussearch-backend-error'); } else { $result->warning('cirrussearch-timed-out'); } } } return $result; }
/** * Produce a set of completion suggestions for text using _suggest * See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html * * WARNING: experimental API * * @param string $text Search term * @param string[]|null $variants Search term variants * (usually issued from $wgContLang->autoConvertToAllVariants( $text ) ) * @param array $context * @return Status */ public function suggest($text, $variants = null, $context = null) { $this->setTermAndVariants($text, $variants); $this->context = $context; list($profiles, $suggest) = $this->buildQuery(); $queryOptions = array(); $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); $this->connection->setTimeout($queryOptions['timeout']); $index = $this->connection->getIndex($this->indexBaseName, Connection::TITLE_SUGGEST_TYPE); $logContext = array('query' => $text, 'queryType' => $this->queryType); $searcher = $this; $limit = $this->limit; $result = Util::doPoolCounterWork('CirrusSearch-Search', $this->user, function () use($searcher, $index, $suggest, $logContext, $queryOptions, $profiles, $text, $limit) { $description = "{queryType} search for '{query}'"; $searcher->start($description, $logContext); try { $result = $index->request("_suggest", Request::POST, $suggest, $queryOptions); if ($result->isOk()) { $result = $searcher->postProcessSuggest($result, $profiles, $limit); return $searcher->success($result); } return $result; } catch (\Elastica\Exception\ExceptionInterface $e) { return $searcher->failure($e); } }); return $result; }