setHighlight() public method

Sets highlight arguments for the query.
public setHighlight ( array $highlightArgs )
$highlightArgs array Set all highlight arguments
 /**
  * Search revisions with provided term.
  *
  * @param string $term Term to search
  * @return Status
  */
 public function searchText($term)
 {
     // full-text search
     $queryString = new QueryString($term);
     $queryString->setFields(array('revisions.text'));
     $this->query->setQuery($queryString);
     // add aggregation to determine exact amount of matching search terms
     $terms = $this->getTerms($term);
     $this->query->addAggregation($this->termsAggregation($terms));
     // @todo: abstract-away this config? (core/cirrus also has this - share it somehow?)
     $this->query->setHighlight(array('fields' => array(static::HIGHLIGHT_FIELD => array('type' => 'plain', 'order' => 'score', 'number_of_fragments' => 1, 'fragment_size' => 10000)), 'pre_tags' => array(static::HIGHLIGHT_PRE), 'post_tags' => array(static::HIGHLIGHT_POST)));
     // @todo: support insource: queries (and perhaps others)
     $searchable = Connection::getFlowIndex($this->indexBaseName);
     if ($this->type !== false) {
         $searchable = $searchable->getType($this->type);
     }
     $search = $searchable->createSearch($this->query);
     // @todo: PoolCounter config at PoolCounterSettings-eqiad.php
     // @todo: do we want this class to extend from ElasticsearchIntermediary and use its success & failure methods (like CirrusSearch/Searcher does)?
     // Perform the search
     $work = new PoolCounterWorkViaCallback('Flow-Search', "_elasticsearch", array('doWork' => function () use($search) {
         try {
             $result = $search->search();
             return Status::newGood($result);
         } catch (ExceptionInterface $e) {
             if (strpos($e->getMessage(), 'dynamic scripting for [groovy] disabled')) {
                 // known issue with default ES config, let's display a more helpful message
                 return Status::newFatal(new \RawMessage("Couldn't complete search: dynamic scripting needs to be enabled. " . "Please add 'script.disable_dynamic: false' to your elasticsearch.yml"));
             }
             return Status::newFatal('flow-error-search');
         }
     }, 'error' => function (Status $status) {
         $status = $status->getErrorsArray();
         wfLogWarning('Pool error searching Elasticsearch: ' . $status[0][0]);
         return Status::newFatal('flow-error-search');
     }));
     $result = $work->execute();
     return $result;
 }
Example #2
0
 /**
  * @param Search $search
  * @return Result[]
  */
 public function search(Search $search)
 {
     $bool = new Bool();
     $match = new Match();
     $match->setField('text', $search->getInput());
     $bool->addMust($match);
     $query = new Query();
     $query->setQuery($bool);
     $query->setHighlight(['pre_tags' => ['<mark>'], 'post_tags' => ['</mark>'], 'fields' => ['text' => ['highlight_query' => [$bool->toArray()]]]]);
     $results = $this->getIndex()->search($query, 50)->getResults();
     $this->onSearch($search, $results);
     return $results;
 }
 /**
  * @group functional
  */
 public function testHightlightSearch()
 {
     $index = $this->_createIndex();
     $type = $index->getType('helloworld');
     $phrase = 'My name is ruflin';
     $type->addDocuments(array(new Document(1, array('id' => 1, 'phrase' => $phrase, 'username' => 'hanswurst', 'test' => array('2', '3', '5'))), new Document(2, array('id' => 2, 'phrase' => $phrase, 'username' => 'peter', 'test' => array('2', '3', '5')))));
     $matchQuery = new Query\MatchPhrase('phrase', 'ruflin');
     $query = new Query($matchQuery);
     $query->setHighlight(array('pre_tags' => array('<em class="highlight">'), 'post_tags' => array('</em>'), 'fields' => array('phrase' => array('fragment_size' => 200, 'number_of_fragments' => 1))));
     $index->refresh();
     $resultSet = $type->search($query);
     foreach ($resultSet as $result) {
         $highlight = $result->getHighlights();
         $this->assertEquals(array('phrase' => array(0 => 'My name is <em class="highlight">ruflin</em>')), $highlight);
     }
     $this->assertEquals(2, $resultSet->count());
 }
Example #4
0
 /**
  * @httpMethod GET
  * @path /
  */
 public function doIndex()
 {
     $url = new Url($this->base->getSelf());
     $count = $url->getParam('count') > 0 ? $url->getParam('count') : 8;
     $count = $count > 16 ? 16 : $count;
     $search = $this->get->search('string');
     if (!empty($search)) {
         $search = strlen($search) > 64 ? substr($search, 0, 64) : $search;
         $queryString = new QueryString();
         //$queryString->setDefaultOperator('AND');
         $queryString->setQuery($search);
         $query = new Query();
         $query->setQuery($queryString);
         $query->setFrom($url->getParam('startIndex'));
         $query->setLimit($count);
         $query->setHighlight(array('pre_tags' => array('<mark>'), 'post_tags' => array('</mark>'), 'fields' => array('title' => new \stdClass(), 'content' => new \stdClass())));
         // get elasticsearch client
         $client = new Client(array('host' => $this->registry['search.host'], 'port' => $this->registry['search.port']));
         $index = $client->getIndex('amun');
         $searchResult = $index->search($query);
         $result = new ResultSet($searchResult->getTotalHits(), $url->getParam('startIndex'), $count);
         foreach ($searchResult as $row) {
             $data = $row->getData();
             $data['url'] = $this->config['psx_url'] . '/' . $this->config['psx_dispatch'] . $data['path'];
             $data['date'] = new DateTime('@' . $data['date']);
             // if we have an highlite overwrite the title or content
             $highlights = $row->getHighlights();
             if (isset($highlights['title'])) {
                 $data['title'] = implode(' ... ', $highlights['title']);
             }
             if (isset($highlights['content'])) {
                 $data['content'] = implode(' ... ', $highlights['content']);
             }
             $result->addData($data);
         }
         $this->template->assign('resultSearch', $result);
         $paging = new Paging($url, $result);
         $this->template->assign('pagingSearch', $paging, 0);
         return $result;
     }
 }
 /**
  * Powers full-text-like searches including prefix search.
  *
  * @param string $type
  * @param string $for
  * @return Status(mixed) results from the query transformed by the resultsType
  */
 private function search($type, $for)
 {
     if ($this->nonTextQueries) {
         $bool = new \Elastica\Query\Bool();
         if ($this->query !== null) {
             $bool->addMust($this->query);
         }
         foreach ($this->nonTextQueries as $nonTextQuery) {
             $bool->addMust($nonTextQuery);
         }
         $this->query = $bool;
     }
     if ($this->resultsType === null) {
         $this->resultsType = new FullTextResultsType(FullTextResultsType::HIGHLIGHT_ALL);
     }
     // Default null queries now so the rest of the method can assume it is not null.
     if ($this->query === null) {
         $this->query = new \Elastica\Query\MatchAll();
     }
     $query = new Elastica\Query();
     $query->setParam('_source', $this->resultsType->getSourceFiltering());
     $query->setParam('fields', $this->resultsType->getFields());
     $extraIndexes = array();
     $indexType = $this->pickIndexTypeFromNamespaces();
     if ($this->namespaces) {
         $extraIndexes = $this->getAndFilterExtraIndexes();
         if ($this->needNsFilter($extraIndexes, $indexType)) {
             $this->filters[] = new \Elastica\Filter\Terms('namespace', $this->namespaces);
         }
     }
     // Wrap $this->query in a filtered query if there are any filters
     $unifiedFilter = Filters::unify($this->filters, $this->notFilters);
     if ($unifiedFilter !== null) {
         $this->query = new \Elastica\Query\Filtered($this->query, $unifiedFilter);
     }
     // Call installBoosts right after we're done munging the query to include filters
     // so any rescores installBoosts adds to the query are done against filtered results.
     $this->installBoosts();
     $query->setQuery($this->query);
     $highlight = $this->resultsType->getHighlightingConfiguration($this->highlightSource);
     if ($highlight) {
         // Fuzzy queries work _terribly_ with the plain highlighter so just drop any field that is forcing
         // the plain highlighter all together.  Do this here because this works so badly that no
         // ResultsType should be able to use the plain highlighter for these queries.
         if ($this->fuzzyQuery) {
             $highlight['fields'] = array_filter($highlight['fields'], function ($field) {
                 return $field['type'] !== 'plain';
             });
         }
         if (!empty($this->nonTextHighlightQueries)) {
             // We have some phrase_prefix queries, so let's include them in the
             // generated highlight_query.
             $bool = new \Elastica\Query\Bool();
             if ($this->highlightQuery) {
                 $bool->addShould($this->highlightQuery);
             }
             foreach ($this->nonTextHighlightQueries as $nonTextHighlightQuery) {
                 $bool->addShould($nonTextHighlightQuery);
             }
             $this->highlightQuery = $bool;
         }
         if ($this->highlightQuery) {
             $highlight['highlight_query'] = $this->highlightQuery->toArray();
         }
         $query->setHighlight($highlight);
     }
     if ($this->suggest) {
         $query->setParam('suggest', $this->suggest);
         $query->addParam('stats', 'suggest');
     }
     if ($this->offset) {
         $query->setFrom($this->offset);
     }
     if ($this->limit) {
         $query->setSize($this->limit);
     }
     if ($this->sort != 'relevance') {
         // Clear rescores if we aren't using relevance as the search sort because they aren't used.
         $this->rescore = array();
     }
     if ($this->rescore) {
         // rescore_query has to be in array form before we send it to Elasticsearch but it is way easier to work
         // with if we leave it in query for until now
         $modifiedRescore = array();
         foreach ($this->rescore as $rescore) {
             $rescore['query']['rescore_query'] = $rescore['query']['rescore_query']->toArray();
             $modifiedRescore[] = $rescore;
         }
         $query->setParam('rescore', $modifiedRescore);
     }
     $query->addParam('stats', $type);
     switch ($this->sort) {
         case 'relevance':
             break;
             // The default
         // The default
         case 'title_asc':
             $query->setSort(array('title.keyword' => 'asc'));
             break;
         case 'title_desc':
             $query->setSort(array('title.keyword' => 'desc'));
             break;
         case 'incoming_links_asc':
             $query->setSort(array('incoming_links' => array('order' => 'asc', 'missing' => '_first')));
             break;
         case 'incoming_links_desc':
             $query->setSort(array('incoming_links' => array('order' => 'desc', 'missing' => '_last')));
             break;
         default:
             LoggerFactory::getInstance('CirrusSearch')->warning("Invalid sort type: {sort}", array('sort' => $this->sort));
     }
     $queryOptions = array();
     if ($this->config->get('CirrusSearchMoreAccurateScoringMode')) {
         $queryOptions['search_type'] = 'dfs_query_then_fetch';
     }
     switch ($type) {
         case 'regex':
             $poolCounterType = 'CirrusSearch-Regex';
             $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'regex');
             break;
         case 'prefix':
             $poolCounterType = 'CirrusSearch-Prefix';
             $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default');
             break;
         default:
             $poolCounterType = 'CirrusSearch-Search';
             $queryOptions['timeout'] = $this->config->getElement('CirrusSearchSearchShardTimeout', 'default');
     }
     $this->connection->setTimeout($queryOptions['timeout']);
     // Setup the search
     $pageType = $this->connection->getPageType($this->indexBaseName, $indexType);
     $search = $pageType->createSearch($query, $queryOptions);
     foreach ($extraIndexes as $i) {
         $search->addIndex($i);
     }
     $description = "{queryType} search for '{query}'";
     $logContext = array('queryType' => $type, 'query' => $for);
     if ($this->returnQuery) {
         return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'params' => $search->getOptions(), 'query' => $query->toArray(), 'options' => $queryOptions));
     }
     if ($this->returnExplain && $this->returnResult) {
         $query->setExplain(true);
     }
     // Perform the search
     $searcher = $this;
     $user = $this->user;
     $result = Util::doPoolCounterWork($poolCounterType, $this->user, function () use($searcher, $search, $description, $logContext) {
         try {
             $searcher->start($description, $logContext);
             return $searcher->success($search->search());
         } catch (\Elastica\Exception\ExceptionInterface $e) {
             return $searcher->failure($e);
         }
     }, function ($error, $key, $userName) use($type, $description, $user, $logContext) {
         $forUserName = $userName ? "for {userName} " : '';
         LoggerFactory::getInstance('CirrusSearch')->warning("Pool error {$forUserName}on key {key} during {$description}:  {error}", $logContext + array('userName' => $userName, 'key' => 'key', 'error' => $error));
         if ($error === 'pool-queuefull') {
             if (strpos($key, 'nowait:CirrusSearch:_per_user') === 0) {
                 $loggedIn = $user->isLoggedIn() ? 'logged-in' : 'anonymous';
                 return Status::newFatal("cirrussearch-too-busy-for-you-{$loggedIn}-error");
             }
             if ($type === 'regex') {
                 return Status::newFatal('cirrussearch-regex-too-busy-error');
             }
             return Status::newFatal('cirrussearch-too-busy-error');
         }
         return Status::newFatal('cirrussearch-backend-error');
     });
     if ($result->isOK()) {
         $responseData = $result->getValue()->getResponse()->getData();
         if ($this->returnResult) {
             return Status::newGood(array('description' => $this->formatDescription($description, $logContext), 'path' => $search->getPath(), 'result' => $responseData));
         }
         $result->setResult(true, $this->resultsType->transformElasticsearchResult($this->suggestPrefixes, $this->suggestSuffixes, $result->getValue(), $this->searchContext->isSearchContainedSyntax()));
         if (isset($responseData['timed_out']) && $responseData['timed_out']) {
             LoggerFactory::getInstance('CirrusSearch')->warning("{$description} timed out and only returned partial results!", $logContext);
             if ($result->getValue()->numRows() === 0) {
                 return Status::newFatal('cirrussearch-backend-error');
             } else {
                 $result->warning('cirrussearch-timed-out');
             }
         }
     }
     return $result;
 }
Example #6
0
 /**
  * @return ElasticaQuery
  */
 private function prepareQuery($params, $start = null, $limit = null)
 {
     $query = null;
     $filter = null;
     $sort = ['_score' => 'desc'];
     // We'd like to search in both title and description for keywords
     if (!empty($params['keywords'])) {
         $query = new QueryString($params['keywords']);
         $query->setDefaultOperator('AND')->setFields(['title', 'description']);
     }
     // Add location filter is location is selected from autosuggest
     if (!empty($params['location_id'])) {
         $location = Location::find($params['location_id']);
         $filter = new GeoDistance('location', ['lat' => $location->lat, 'lon' => $location->lon], $params['radius'] . 'mi');
         // Sort by nearest hit
         $sort = ['_geo_distance' => ['jobs.location' => [(double) $location->lon, (double) $location->lat], 'order' => 'asc', 'unit' => 'mi']];
     }
     // If neither keyword nor location supplied, then return all
     if (empty($params['keywords']) && empty($params['location_id'])) {
         $query = new MatchAll();
     }
     // We need a filtered query
     $elasticaQuery = new ElasticaQuery(new Filtered($query, $filter));
     $elasticaQuery->addSort($sort);
     // Offset and limits
     if (!is_null($start) && !is_null($limit)) {
         $elasticaQuery->setFrom($start)->setSize($limit);
     }
     // Set up the highlight
     $elasticaQuery->setHighlight(['order' => 'score', 'fields' => ['title' => ['fragment_size' => 100], 'description' => ['fragment_size' => 200]]]);
     return $elasticaQuery;
 }
Example #7
0
 /**
  * simple search with an operator and words
  *
  * @param string  $operator    'And' or 'Or' default : 'Or'
  * @param string  $words       data
  * @param integer $start       the begining of the paging
  * @param integer $limit       the interval of the paging
  * @param array   $names_types the restrictive type(s) where the search take place.
  * @param bool    $aggregation parameter the search to be aggregated or not.
  *
  * @return \Elastica\ResultSet
  */
 function searchQueryString($operator, $words, $start = 0, $limit = 30, $names_types = null, $aggregation = false)
 {
     $words = CSearch::normalizeEncoding($words);
     // Define a Query. We want a string query.
     $queryString = new Elastica\Query\QueryString($words);
     $queryString->setDefaultOperator("and");
     // Create the actual search object with some data.
     $query = new Elastica\Query($queryString);
     //create aggregation
     if ($aggregation) {
         // on aggrège d'abord par class d'object référents
         // on effectue un sous aggrégation par id des objets référents.
         $agg_by_date = new CSearchAggregation("Terms", "date_log", "date", 10);
         $sub_agg_by_user = new CSearchAggregation("Terms", "user_id", "user_id", 10);
         $sub_agg_by_contexte = new CSearchAggregation("Terms", "contexte", "_type", 10);
         $sub_agg_by_user->_aggregation->addAggregation($sub_agg_by_contexte->_aggregation);
         $agg_by_date->_aggregation->addAggregation($sub_agg_by_user->_aggregation);
         $query->addAggregation($agg_by_date->_aggregation);
     } else {
         //  Pagination
         $query->setFrom($start);
         // Where to start
         $query->setLimit($limit);
     }
     //Highlight
     $query->setHighlight(array("fields" => array("body" => array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fragment_size" => 80, "number_of_fragments" => 10))));
     //Search on the index.
     $index = CAppUI::conf("search index_name") . "_log";
     $index = $this->loadIndex($index);
     $search = new \Elastica\Search($this->_client);
     $search->addIndex($index);
     if ($names_types) {
         $search->addTypes($names_types);
     }
     return $search->search($query);
 }
Example #8
0
 /**
  * Query to search auto
  *
  * @param CSearchThesaurusEntry $favori The favori
  * @param CSejour               $sejour The sejour
  *
  * @return Query
  */
 function querySearchAuto($favori, $sejour)
 {
     $query_bool = new Elastica\Query\Bool();
     // query des séjours
     $query_sejour = new Elastica\Query\QueryString();
     $query_sejour->setQuery($this->constructWordsWithSejour($sejour->_id));
     $query_sejour->setDefaultOperator("and");
     $query_bool->addMust($query_sejour);
     // query du favoris
     $query_words = new Elastica\Query\QueryString();
     $query_words->setQuery($this->normalizeEncoding($favori->entry));
     $query_words->setFields(array("body", "title"));
     $query_words->setDefaultOperator("and");
     $query_bool->addMust($query_words);
     $query = new Query($query_bool);
     //  Pagination
     $query->setFrom(0);
     // Where to start
     $query->setLimit(30);
     //Highlight
     $query->setHighlight(array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fields" => array("body" => array("fragment_size" => 50, "number_of_fragments" => 3, "highlight_query" => array("bool" => array("must" => array("match" => array("body" => array("query" => $this->normalizeEncoding($favori->entry)))), "minimum_should_match" => 1))))));
     return $query;
 }