/** * * @param Report $report * @param number $limit * @param string $sort * @param string $order * @param boolean $lazy * @param boolean $active * @param boolean $silent * * @return ArrayCollection $data */ public function generateResults(Report $report, $limit = 0, $sort = '_score', $order = 'desc', $lazy = true, $active = true, $silent = false) { $data = new ArrayCollection(); $request = $this->getServiceLocator()->get('Request'); $silent = $request->getQuery('debug') ? false : $silent; if ($report instanceof Report) { $agent = $report->getAgent(); $lead_query = new \Agent\Elastica\Query\BoolQuery(); if ($active) { $lead_query->addMust(new Elastica\Query\Match('active', 1)); } $client = $this->getElasticaClient(); if ($agent && $client) { $filters = $agent->getFilter(); if ($filters) { $lead_query = $this->applyFilters($lead_query, $filters); } $criteria = $agent->getCriteria(false); if ($criteria) { try { $query = new Elastica\Query(); /* @var $criterion \Agent\Entity\AgentCriterion */ foreach ($criteria as $i => $criterion) { $lead_query = $this->buildQuery($lead_query, $criterion); } $query->setQuery($lead_query); $size = $limit ? $limit : 1000; $query->setParam('track_scores', true); $query->addSort([$sort => ['order' => $order]]); $query->setSize($size); $results = $this->runQuery($query, $lazy, $silent); $total = isset($results['total']) ? $results['total'] : false; if ($total && $results['results']) { foreach ($results['results'] as $result) { $data->add($result); } } if ($total > $size) { $limit = $limit ?: $total; for ($page = 1; $page < ceil($limit / $size); $page++) { $query->setFrom($page * $size); $results = $this->runQuery($query, $silent); if ($results['results']) { foreach ($results as $result) { if (is_array($result)) { foreach ($result as $r) { if ($r instanceof Result) { $data->add($r); } } } elseif ($result instanceof Result) { $data->add($result); } } } } } } catch (\Exception $e) { if (!$silent) { $this->getFlashMessenger()->addErrorMessage($e->getMessage()); } } } } } return $data; }
/** * Powers full-text-like searches including prefix search. * * @param string $type * @param string $for * @return Status(mixed) results from the query transformed by the resultsType */ private function search($type, $for) { if ($this->nonTextQueries) { $bool = new \Elastica\Query\Bool(); if ($this->query !== null) { $bool->addMust($this->query); } foreach ($this->nonTextQueries as $nonTextQuery) { $bool->addMust($nonTextQuery); } $this->query = $bool; } if ($this->resultsType === null) { $this->resultsType = new FullTextResultsType(FullTextResultsType::HIGHLIGHT_ALL); } // Default null queries now so the rest of the method can assume it is not null. if ($this->query === null) { $this->query = new \Elastica\Query\MatchAll(); } $query = new Elastica\Query(); $query->setParam('_source', $this->resultsType->getSourceFiltering()); $query->setParam('fields', $this->resultsType->getFields()); $extraIndexes = array(); $indexType = $this->pickIndexTypeFromNamespaces(); if ($this->namespaces) { $extraIndexes = $this->getAndFilterExtraIndexes(); if ($this->needNsFilter($extraIndexes, $indexType)) { $this->filters[] = new \Elastica\Filter\Terms('namespace', $this->namespaces); } } // Wrap $this->query in a filtered query if there are any filters $unifiedFilter = Filters::unify($this->filters, $this->notFilters); if ($unifiedFilter !== null) { $this->query = new \Elastica\Query\Filtered($this->query, $unifiedFilter); } // Call installBoosts right after we're done munging the query to include filters // so any rescores installBoosts adds to the query are done against filtered results. $this->installBoosts(); $query->setQuery($this->query); $highlight = $this->resultsType->getHighlightingConfiguration($this->highlightSource); if ($highlight) { // Fuzzy queries work _terribly_ with the plain highlighter so just drop any field that is forcing // the plain highlighter all together. Do this here because this works so badly that no // ResultsType should be able to use the plain highlighter for these queries. if ($this->fuzzyQuery) { $highlight['fields'] = array_filter($highlight['fields'], function ($field) { return $field['type'] !== 'plain'; }); } if (!empty($this->nonTextHighlightQueries)) { // We have some phrase_prefix queries, so let's include them in the // generated highlight_query. $bool = new \Elastica\Query\Bool(); if ($this->highlightQuery) { $bool->addShould($this->highlightQuery); } foreach ($this->nonTextHighlightQueries as $nonTextHighlightQuery) { $bool->addShould($nonTextHighlightQuery); } $this->highlightQuery = $bool; } if ($this->highlightQuery) { $highlight['highlight_query'] = $this->highlightQuery->toArray(); } $query->setHighlight($highlight); } if ($this->suggest) { $query->setParam('suggest', $this->suggest); $query->addParam('stats', 'suggest'); } if ($this->offset) { $query->setFrom($this->offset); } if ($this->limit) { $query->setSize($this->limit); } if ($this->sort != 'relevance') { // Clear rescores if we aren't using relevance as the search sort because they aren't used. $this->rescore = array(); } if ($this->rescore) { // rescore_query has to be in array form before we send it to Elasticsearch but it is way easier to work // with if we leave it in query for until now $modifiedRescore = array(); foreach ($this->rescore as $rescore) { $rescore['query']['rescore_query'] = $rescore['query']['rescore_query']->toArray(); $modifiedRescore[] = $rescore; } $query->setParam('rescore', $modifiedRescore); } $query->addParam('stats', $type); switch ($this->sort) { case 'relevance': break; // The default // The default case 'title_asc': $query->setSort(array('title.keyword' => 'asc')); break; case 'title_desc': $query->setSort(array('title.keyword' => 'desc')); break; case 'incoming_links_asc': $query->setSort(array('incoming_links' => array('order' => 'asc', 'missing' => '_first'))); break; case 'incoming_links_desc': $query->setSort(array('incoming_links' => array('order' => 'desc', 'missing' => '_last'))); break; default: LoggerFactory::getInstance('CirrusSearch')->warning("Invalid sort type: {sort}", array('sort' => $this->sort)); } $queryOptions = array(); if ($this->config->get('CirrusSearchMoreAccurateScoringMode')) { $queryOptions['search_type'] = 'dfs_query_then_fetch'; } switch ($type) { case 'regex': $poolCounterType = 'CirrusSearch-Regex'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'regex'); break; case 'prefix': $poolCounterType = 'CirrusSearch-Prefix'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); break; default: $poolCounterType = 'CirrusSearch-Search'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); } $this->connection->setTimeout($queryOptions['timeout']); // Setup the search $pageType = $this->connection->getPageType($this->indexBaseName, $indexType); $search = $pageType->createSearch($query, $queryOptions); foreach ($extraIndexes as $i) { $search->addIndex($i); } $description = "{queryType} search for '{query}'"; $logContext = array('queryType' => $type, 'query' => $for); if ($this->returnQuery) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'params' => $search->getOptions(), 'query' => $query->toArray(), 'options' => $queryOptions)); } if ($this->returnExplain && $this->returnResult) { $query->setExplain(true); } // Perform the search $searcher = $this; $user = $this->user; $result = Util::doPoolCounterWork($poolCounterType, $this->user, function () use($searcher, $search, $description, $logContext) { try { $searcher->start($description, $logContext); return $searcher->success($search->search()); } catch (\Elastica\Exception\ExceptionInterface $e) { return $searcher->failure($e); } }, function ($error, $key, $userName) use($type, $description, $user, $logContext) { $forUserName = $userName ? "for {userName} " : ''; LoggerFactory::getInstance('CirrusSearch')->warning("Pool error {$forUserName}on key {key} during {$description}: {error}", $logContext + array('userName' => $userName, 'key' => 'key', 'error' => $error)); if ($error === 'pool-queuefull') { if (strpos($key, 'nowait:CirrusSearch:_per_user') === 0) { $loggedIn = $user->isLoggedIn() ? 'logged-in' : 'anonymous'; return Status::newFatal("cirrussearch-too-busy-for-you-{$loggedIn}-error"); } if ($type === 'regex') { return Status::newFatal('cirrussearch-regex-too-busy-error'); } return Status::newFatal('cirrussearch-too-busy-error'); } return Status::newFatal('cirrussearch-backend-error'); }); if ($result->isOK()) { $responseData = $result->getValue()->getResponse()->getData(); if ($this->returnResult) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'result' => $responseData)); } $result->setResult(true, $this->resultsType->transformElasticsearchResult($this->suggestPrefixes, $this->suggestSuffixes, $result->getValue(), $this->searchContext->isSearchContainedSyntax())); if (isset($responseData['timed_out']) && $responseData['timed_out']) { LoggerFactory::getInstance('CirrusSearch')->warning("{$description} timed out and only returned partial results!", $logContext); if ($result->getValue()->numRows() === 0) { return Status::newFatal('cirrussearch-backend-error'); } else { $result->warning('cirrussearch-timed-out'); } } } return $result; }