/** * Search revisions with provided term. * * @param string $term Term to search * @return Status */ public function searchText($term) { // full-text search $queryString = new QueryString($term); $queryString->setFields(array('revisions.text')); $this->query->setQuery($queryString); // add aggregation to determine exact amount of matching search terms $terms = $this->getTerms($term); $this->query->addAggregation($this->termsAggregation($terms)); // @todo: abstract-away this config? (core/cirrus also has this - share it somehow?) $this->query->setHighlight(array('fields' => array(static::HIGHLIGHT_FIELD => array('type' => 'plain', 'order' => 'score', 'number_of_fragments' => 1, 'fragment_size' => 10000)), 'pre_tags' => array(static::HIGHLIGHT_PRE), 'post_tags' => array(static::HIGHLIGHT_POST))); // @todo: support insource: queries (and perhaps others) $searchable = Connection::getFlowIndex($this->indexBaseName); if ($this->type !== false) { $searchable = $searchable->getType($this->type); } $search = $searchable->createSearch($this->query); // @todo: PoolCounter config at PoolCounterSettings-eqiad.php // @todo: do we want this class to extend from ElasticsearchIntermediary and use its success & failure methods (like CirrusSearch/Searcher does)? // Perform the search $work = new PoolCounterWorkViaCallback('Flow-Search', "_elasticsearch", array('doWork' => function () use($search) { try { $result = $search->search(); return Status::newGood($result); } catch (ExceptionInterface $e) { if (strpos($e->getMessage(), 'dynamic scripting for [groovy] disabled')) { // known issue with default ES config, let's display a more helpful message return Status::newFatal(new \RawMessage("Couldn't complete search: dynamic scripting needs to be enabled. " . "Please add 'script.disable_dynamic: false' to your elasticsearch.yml")); } return Status::newFatal('flow-error-search'); } }, 'error' => function (Status $status) { $status = $status->getErrorsArray(); wfLogWarning('Pool error searching Elasticsearch: ' . $status[0][0]); return Status::newFatal('flow-error-search'); })); $result = $work->execute(); return $result; }
/** * @param Search $search * @return Result[] */ public function search(Search $search) { $bool = new Bool(); $match = new Match(); $match->setField('text', $search->getInput()); $bool->addMust($match); $query = new Query(); $query->setQuery($bool); $query->setHighlight(['pre_tags' => ['<mark>'], 'post_tags' => ['</mark>'], 'fields' => ['text' => ['highlight_query' => [$bool->toArray()]]]]); $results = $this->getIndex()->search($query, 50)->getResults(); $this->onSearch($search, $results); return $results; }
/** * @group functional */ public function testHightlightSearch() { $index = $this->_createIndex(); $type = $index->getType('helloworld'); $phrase = 'My name is ruflin'; $type->addDocuments(array(new Document(1, array('id' => 1, 'phrase' => $phrase, 'username' => 'hanswurst', 'test' => array('2', '3', '5'))), new Document(2, array('id' => 2, 'phrase' => $phrase, 'username' => 'peter', 'test' => array('2', '3', '5'))))); $matchQuery = new Query\MatchPhrase('phrase', 'ruflin'); $query = new Query($matchQuery); $query->setHighlight(array('pre_tags' => array('<em class="highlight">'), 'post_tags' => array('</em>'), 'fields' => array('phrase' => array('fragment_size' => 200, 'number_of_fragments' => 1)))); $index->refresh(); $resultSet = $type->search($query); foreach ($resultSet as $result) { $highlight = $result->getHighlights(); $this->assertEquals(array('phrase' => array(0 => 'My name is <em class="highlight">ruflin</em>')), $highlight); } $this->assertEquals(2, $resultSet->count()); }
/** * @httpMethod GET * @path / */ public function doIndex() { $url = new Url($this->base->getSelf()); $count = $url->getParam('count') > 0 ? $url->getParam('count') : 8; $count = $count > 16 ? 16 : $count; $search = $this->get->search('string'); if (!empty($search)) { $search = strlen($search) > 64 ? substr($search, 0, 64) : $search; $queryString = new QueryString(); //$queryString->setDefaultOperator('AND'); $queryString->setQuery($search); $query = new Query(); $query->setQuery($queryString); $query->setFrom($url->getParam('startIndex')); $query->setLimit($count); $query->setHighlight(array('pre_tags' => array('<mark>'), 'post_tags' => array('</mark>'), 'fields' => array('title' => new \stdClass(), 'content' => new \stdClass()))); // get elasticsearch client $client = new Client(array('host' => $this->registry['search.host'], 'port' => $this->registry['search.port'])); $index = $client->getIndex('amun'); $searchResult = $index->search($query); $result = new ResultSet($searchResult->getTotalHits(), $url->getParam('startIndex'), $count); foreach ($searchResult as $row) { $data = $row->getData(); $data['url'] = $this->config['psx_url'] . '/' . $this->config['psx_dispatch'] . $data['path']; $data['date'] = new DateTime('@' . $data['date']); // if we have an highlite overwrite the title or content $highlights = $row->getHighlights(); if (isset($highlights['title'])) { $data['title'] = implode(' ... ', $highlights['title']); } if (isset($highlights['content'])) { $data['content'] = implode(' ... ', $highlights['content']); } $result->addData($data); } $this->template->assign('resultSearch', $result); $paging = new Paging($url, $result); $this->template->assign('pagingSearch', $paging, 0); return $result; } }
/** * Powers full-text-like searches including prefix search. * * @param string $type * @param string $for * @return Status(mixed) results from the query transformed by the resultsType */ private function search($type, $for) { if ($this->nonTextQueries) { $bool = new \Elastica\Query\Bool(); if ($this->query !== null) { $bool->addMust($this->query); } foreach ($this->nonTextQueries as $nonTextQuery) { $bool->addMust($nonTextQuery); } $this->query = $bool; } if ($this->resultsType === null) { $this->resultsType = new FullTextResultsType(FullTextResultsType::HIGHLIGHT_ALL); } // Default null queries now so the rest of the method can assume it is not null. if ($this->query === null) { $this->query = new \Elastica\Query\MatchAll(); } $query = new Elastica\Query(); $query->setParam('_source', $this->resultsType->getSourceFiltering()); $query->setParam('fields', $this->resultsType->getFields()); $extraIndexes = array(); $indexType = $this->pickIndexTypeFromNamespaces(); if ($this->namespaces) { $extraIndexes = $this->getAndFilterExtraIndexes(); if ($this->needNsFilter($extraIndexes, $indexType)) { $this->filters[] = new \Elastica\Filter\Terms('namespace', $this->namespaces); } } // Wrap $this->query in a filtered query if there are any filters $unifiedFilter = Filters::unify($this->filters, $this->notFilters); if ($unifiedFilter !== null) { $this->query = new \Elastica\Query\Filtered($this->query, $unifiedFilter); } // Call installBoosts right after we're done munging the query to include filters // so any rescores installBoosts adds to the query are done against filtered results. $this->installBoosts(); $query->setQuery($this->query); $highlight = $this->resultsType->getHighlightingConfiguration($this->highlightSource); if ($highlight) { // Fuzzy queries work _terribly_ with the plain highlighter so just drop any field that is forcing // the plain highlighter all together. Do this here because this works so badly that no // ResultsType should be able to use the plain highlighter for these queries. if ($this->fuzzyQuery) { $highlight['fields'] = array_filter($highlight['fields'], function ($field) { return $field['type'] !== 'plain'; }); } if (!empty($this->nonTextHighlightQueries)) { // We have some phrase_prefix queries, so let's include them in the // generated highlight_query. $bool = new \Elastica\Query\Bool(); if ($this->highlightQuery) { $bool->addShould($this->highlightQuery); } foreach ($this->nonTextHighlightQueries as $nonTextHighlightQuery) { $bool->addShould($nonTextHighlightQuery); } $this->highlightQuery = $bool; } if ($this->highlightQuery) { $highlight['highlight_query'] = $this->highlightQuery->toArray(); } $query->setHighlight($highlight); } if ($this->suggest) { $query->setParam('suggest', $this->suggest); $query->addParam('stats', 'suggest'); } if ($this->offset) { $query->setFrom($this->offset); } if ($this->limit) { $query->setSize($this->limit); } if ($this->sort != 'relevance') { // Clear rescores if we aren't using relevance as the search sort because they aren't used. $this->rescore = array(); } if ($this->rescore) { // rescore_query has to be in array form before we send it to Elasticsearch but it is way easier to work // with if we leave it in query for until now $modifiedRescore = array(); foreach ($this->rescore as $rescore) { $rescore['query']['rescore_query'] = $rescore['query']['rescore_query']->toArray(); $modifiedRescore[] = $rescore; } $query->setParam('rescore', $modifiedRescore); } $query->addParam('stats', $type); switch ($this->sort) { case 'relevance': break; // The default // The default case 'title_asc': $query->setSort(array('title.keyword' => 'asc')); break; case 'title_desc': $query->setSort(array('title.keyword' => 'desc')); break; case 'incoming_links_asc': $query->setSort(array('incoming_links' => array('order' => 'asc', 'missing' => '_first'))); break; case 'incoming_links_desc': $query->setSort(array('incoming_links' => array('order' => 'desc', 'missing' => '_last'))); break; default: LoggerFactory::getInstance('CirrusSearch')->warning("Invalid sort type: {sort}", array('sort' => $this->sort)); } $queryOptions = array(); if ($this->config->get('CirrusSearchMoreAccurateScoringMode')) { $queryOptions['search_type'] = 'dfs_query_then_fetch'; } switch ($type) { case 'regex': $poolCounterType = 'CirrusSearch-Regex'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'regex'); break; case 'prefix': $poolCounterType = 'CirrusSearch-Prefix'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); break; default: $poolCounterType = 'CirrusSearch-Search'; $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default'); } $this->connection->setTimeout($queryOptions['timeout']); // Setup the search $pageType = $this->connection->getPageType($this->indexBaseName, $indexType); $search = $pageType->createSearch($query, $queryOptions); foreach ($extraIndexes as $i) { $search->addIndex($i); } $description = "{queryType} search for '{query}'"; $logContext = array('queryType' => $type, 'query' => $for); if ($this->returnQuery) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'params' => $search->getOptions(), 'query' => $query->toArray(), 'options' => $queryOptions)); } if ($this->returnExplain && $this->returnResult) { $query->setExplain(true); } // Perform the search $searcher = $this; $user = $this->user; $result = Util::doPoolCounterWork($poolCounterType, $this->user, function () use($searcher, $search, $description, $logContext) { try { $searcher->start($description, $logContext); return $searcher->success($search->search()); } catch (\Elastica\Exception\ExceptionInterface $e) { return $searcher->failure($e); } }, function ($error, $key, $userName) use($type, $description, $user, $logContext) { $forUserName = $userName ? "for {userName} " : ''; LoggerFactory::getInstance('CirrusSearch')->warning("Pool error {$forUserName}on key {key} during {$description}: {error}", $logContext + array('userName' => $userName, 'key' => 'key', 'error' => $error)); if ($error === 'pool-queuefull') { if (strpos($key, 'nowait:CirrusSearch:_per_user') === 0) { $loggedIn = $user->isLoggedIn() ? 'logged-in' : 'anonymous'; return Status::newFatal("cirrussearch-too-busy-for-you-{$loggedIn}-error"); } if ($type === 'regex') { return Status::newFatal('cirrussearch-regex-too-busy-error'); } return Status::newFatal('cirrussearch-too-busy-error'); } return Status::newFatal('cirrussearch-backend-error'); }); if ($result->isOK()) { $responseData = $result->getValue()->getResponse()->getData(); if ($this->returnResult) { return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'result' => $responseData)); } $result->setResult(true, $this->resultsType->transformElasticsearchResult($this->suggestPrefixes, $this->suggestSuffixes, $result->getValue(), $this->searchContext->isSearchContainedSyntax())); if (isset($responseData['timed_out']) && $responseData['timed_out']) { LoggerFactory::getInstance('CirrusSearch')->warning("{$description} timed out and only returned partial results!", $logContext); if ($result->getValue()->numRows() === 0) { return Status::newFatal('cirrussearch-backend-error'); } else { $result->warning('cirrussearch-timed-out'); } } } return $result; }
/** * @return ElasticaQuery */ private function prepareQuery($params, $start = null, $limit = null) { $query = null; $filter = null; $sort = ['_score' => 'desc']; // We'd like to search in both title and description for keywords if (!empty($params['keywords'])) { $query = new QueryString($params['keywords']); $query->setDefaultOperator('AND')->setFields(['title', 'description']); } // Add location filter is location is selected from autosuggest if (!empty($params['location_id'])) { $location = Location::find($params['location_id']); $filter = new GeoDistance('location', ['lat' => $location->lat, 'lon' => $location->lon], $params['radius'] . 'mi'); // Sort by nearest hit $sort = ['_geo_distance' => ['jobs.location' => [(double) $location->lon, (double) $location->lat], 'order' => 'asc', 'unit' => 'mi']]; } // If neither keyword nor location supplied, then return all if (empty($params['keywords']) && empty($params['location_id'])) { $query = new MatchAll(); } // We need a filtered query $elasticaQuery = new ElasticaQuery(new Filtered($query, $filter)); $elasticaQuery->addSort($sort); // Offset and limits if (!is_null($start) && !is_null($limit)) { $elasticaQuery->setFrom($start)->setSize($limit); } // Set up the highlight $elasticaQuery->setHighlight(['order' => 'score', 'fields' => ['title' => ['fragment_size' => 100], 'description' => ['fragment_size' => 200]]]); return $elasticaQuery; }
/** * simple search with an operator and words * * @param string $operator 'And' or 'Or' default : 'Or' * @param string $words data * @param integer $start the begining of the paging * @param integer $limit the interval of the paging * @param array $names_types the restrictive type(s) where the search take place. * @param bool $aggregation parameter the search to be aggregated or not. * * @return \Elastica\ResultSet */ function searchQueryString($operator, $words, $start = 0, $limit = 30, $names_types = null, $aggregation = false) { $words = CSearch::normalizeEncoding($words); // Define a Query. We want a string query. $queryString = new Elastica\Query\QueryString($words); $queryString->setDefaultOperator("and"); // Create the actual search object with some data. $query = new Elastica\Query($queryString); //create aggregation if ($aggregation) { // on aggrège d'abord par class d'object référents // on effectue un sous aggrégation par id des objets référents. $agg_by_date = new CSearchAggregation("Terms", "date_log", "date", 10); $sub_agg_by_user = new CSearchAggregation("Terms", "user_id", "user_id", 10); $sub_agg_by_contexte = new CSearchAggregation("Terms", "contexte", "_type", 10); $sub_agg_by_user->_aggregation->addAggregation($sub_agg_by_contexte->_aggregation); $agg_by_date->_aggregation->addAggregation($sub_agg_by_user->_aggregation); $query->addAggregation($agg_by_date->_aggregation); } else { // Pagination $query->setFrom($start); // Where to start $query->setLimit($limit); } //Highlight $query->setHighlight(array("fields" => array("body" => array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fragment_size" => 80, "number_of_fragments" => 10)))); //Search on the index. $index = CAppUI::conf("search index_name") . "_log"; $index = $this->loadIndex($index); $search = new \Elastica\Search($this->_client); $search->addIndex($index); if ($names_types) { $search->addTypes($names_types); } return $search->search($query); }
/** * Query to search auto * * @param CSearchThesaurusEntry $favori The favori * @param CSejour $sejour The sejour * * @return Query */ function querySearchAuto($favori, $sejour) { $query_bool = new Elastica\Query\Bool(); // query des séjours $query_sejour = new Elastica\Query\QueryString(); $query_sejour->setQuery($this->constructWordsWithSejour($sejour->_id)); $query_sejour->setDefaultOperator("and"); $query_bool->addMust($query_sejour); // query du favoris $query_words = new Elastica\Query\QueryString(); $query_words->setQuery($this->normalizeEncoding($favori->entry)); $query_words->setFields(array("body", "title")); $query_words->setDefaultOperator("and"); $query_bool->addMust($query_words); $query = new Query($query_bool); // Pagination $query->setFrom(0); // Where to start $query->setLimit(30); //Highlight $query->setHighlight(array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fields" => array("body" => array("fragment_size" => 50, "number_of_fragments" => 3, "highlight_query" => array("bool" => array("must" => array("match" => array("body" => array("query" => $this->normalizeEncoding($favori->entry)))), "minimum_should_match" => 1)))))); return $query; }