Example #1
0
 /**
  * @return ElasticaQuery
  */
 private function prepareQuery($params, $start = null, $limit = null)
 {
     $query = null;
     $filter = null;
     $sort = ['_score' => 'desc'];
     // We'd like to search in both title and description for keywords
     if (!empty($params['keywords'])) {
         $query = new QueryString($params['keywords']);
         $query->setDefaultOperator('AND')->setFields(['title', 'description']);
     }
     // Add location filter is location is selected from autosuggest
     if (!empty($params['location_id'])) {
         $location = Location::find($params['location_id']);
         $filter = new GeoDistance('location', ['lat' => $location->lat, 'lon' => $location->lon], $params['radius'] . 'mi');
         // Sort by nearest hit
         $sort = ['_geo_distance' => ['jobs.location' => [(double) $location->lon, (double) $location->lat], 'order' => 'asc', 'unit' => 'mi']];
     }
     // If neither keyword nor location supplied, then return all
     if (empty($params['keywords']) && empty($params['location_id'])) {
         $query = new MatchAll();
     }
     // We need a filtered query
     $elasticaQuery = new ElasticaQuery(new Filtered($query, $filter));
     $elasticaQuery->addSort($sort);
     // Offset and limits
     if (!is_null($start) && !is_null($limit)) {
         $elasticaQuery->setFrom($start)->setSize($limit);
     }
     // Set up the highlight
     $elasticaQuery->setHighlight(['order' => 'score', 'fields' => ['title' => ['fragment_size' => 100], 'description' => ['fragment_size' => 200]]]);
     return $elasticaQuery;
 }
 public function testConstructor()
 {
     $query = new QueryString('elastica');
     $customFiltersScoreQuery = new CustomFiltersScore($query);
     $expected = array('custom_filters_score' => array('query' => $query->toArray()));
     $this->assertEquals($expected, $customFiltersScoreQuery->toArray());
 }
 public function search($searchString, $maxNbResponses)
 {
     $searchQuery = new QueryString();
     $searchQuery->setQuery($searchString)->setFields(array('nom'));
     $repository = $this->searchService->getRepository('StanhomeRhBundle:Customer');
     return $repository->find($searchQuery, $maxNbResponses);
 }
 /**
  * @param $searchString
  * @param null $limitNumber
  * @return mixed
  */
 public function search($searchString, $limitNumber = null)
 {
     $searchQuery = new QueryString();
     $searchQuery->setQuery($searchString)->setDefaultOperator('AND')->setFields($this->fields);
     $repository = $this->searchService->getRepository($this->repository);
     if ($limitNumber !== null) {
         return $repository->find($searchQuery, $limitNumber);
     }
     return $repository->find($searchQuery);
 }
 public function searchAction(Request $request)
 {
     $queryRaw = $request->query->get('q');
     if (!$queryRaw) {
         throw $this->createNotFoundException('No query provided.');
     }
     $query = new QueryString($queryRaw);
     $query->setDefaultOperator('AND');
     try {
         $finder = $this->get('fos_elastica.finder.accard');
         $result = $finder->findHybrid($query, 25);
         $results = new SearchCollection($query, $result);
         $down = false;
     } catch (HttpException $e) {
         $queryRaw = 'Search is down';
         $results = new SearchCollection($query, array());
         $down = true;
     }
     return $this->render('AccardWebBundle:Frontend:search.html.twig', array('last_search' => $queryRaw, 'results' => $results, 'search_down' => $down));
 }
Example #6
0
 /**
  * @httpMethod GET
  * @path /
  */
 public function doIndex()
 {
     $url = new Url($this->base->getSelf());
     $count = $url->getParam('count') > 0 ? $url->getParam('count') : 8;
     $count = $count > 16 ? 16 : $count;
     $search = $this->get->search('string');
     if (!empty($search)) {
         $search = strlen($search) > 64 ? substr($search, 0, 64) : $search;
         $queryString = new QueryString();
         //$queryString->setDefaultOperator('AND');
         $queryString->setQuery($search);
         $query = new Query();
         $query->setQuery($queryString);
         $query->setFrom($url->getParam('startIndex'));
         $query->setLimit($count);
         $query->setHighlight(array('pre_tags' => array('<mark>'), 'post_tags' => array('</mark>'), 'fields' => array('title' => new \stdClass(), 'content' => new \stdClass())));
         // get elasticsearch client
         $client = new Client(array('host' => $this->registry['search.host'], 'port' => $this->registry['search.port']));
         $index = $client->getIndex('amun');
         $searchResult = $index->search($query);
         $result = new ResultSet($searchResult->getTotalHits(), $url->getParam('startIndex'), $count);
         foreach ($searchResult as $row) {
             $data = $row->getData();
             $data['url'] = $this->config['psx_url'] . '/' . $this->config['psx_dispatch'] . $data['path'];
             $data['date'] = new DateTime('@' . $data['date']);
             // if we have an highlite overwrite the title or content
             $highlights = $row->getHighlights();
             if (isset($highlights['title'])) {
                 $data['title'] = implode(' ... ', $highlights['title']);
             }
             if (isset($highlights['content'])) {
                 $data['content'] = implode(' ... ', $highlights['content']);
             }
             $result->addData($data);
         }
         $this->template->assign('resultSearch', $result);
         $paging = new Paging($url, $result);
         $this->template->assign('pagingSearch', $paging, 0);
         return $result;
     }
 }
 /**
  * @param $searchClosed
  * @return \Elastica\ResultSet
  * @throws Exception
  */
 public function doSearch($searchClosed)
 {
     $this->connection = new ElasticSearchConnection();
     $this->connection->init();
     $this->whereClause = new Query\QueryString();
     $this->whereClause->setQuery($this->searchTerms);
     $this->utility = new Util();
     if (isset($_GET['page'])) {
         $this->currentPage = intval($_GET['page']);
     }
     $this->fieldMapping = $this->configurationManager->getConfiguration(ConfigurationManager::CONFIGURATION_TYPE_SETTINGS, 'WMDB.Forger.SearchTermMapping');
     $elasticaQuery = new Query();
     if ($searchClosed === 'true') {
         $elasticaQuery->setQuery($this->whereClause);
     } else {
         $boolSearch = new Query\Bool();
         $boolSearch->addMust($this->whereClause);
         $boolSearch->addMustNot(['term' => ['status.name' => 'Closed']]);
         $boolSearch->addMustNot(['term' => ['status.name' => 'Rejected']]);
         $boolSearch->addMustNot(['term' => ['status.name' => 'Resolved']]);
         $elasticaQuery->setQuery($boolSearch);
     }
     $elasticaQuery->setSize($this->perPage);
     $elasticaQuery->setFrom($this->currentPage * $this->perPage - $this->perPage);
     $usedFilters = $this->addFilters();
     if ($usedFilters !== false) {
         $elasticaQuery->setPostFilter($usedFilters);
     }
     $this->addAggregations($elasticaQuery);
     $elasticaResultSet = $this->connection->getIndex()->search($elasticaQuery);
     $results = $elasticaResultSet->getResults();
     $maxScore = $elasticaResultSet->getMaxScore();
     $aggs = $elasticaResultSet->getAggregations();
     $this->totalHits = $elasticaResultSet->getTotalHits();
     $out = array('pagesToLinkTo' => $this->getPages(), 'currentPage' => $this->currentPage, 'prev' => $this->currentPage - 1, 'next' => $this->currentPage < ceil($this->totalHits / $this->perPage) ? $this->currentPage + 1 : 0, 'totalResults' => $this->totalHits, 'startingAtItem' => $this->currentPage * $this->perPage - ($this->perPage - 1), 'endingAtItem' => $this->currentPage * $this->perPage, 'results' => $results, 'maxScore' => $maxScore, 'aggs' => $aggs);
     if (intval($this->totalHits) <= intval($out['endingAtItem'])) {
         $out['endingAtItem'] = intval($this->totalHits);
     }
     return $out;
 }
Example #8
0
 /**
  * Search the index
  *
  * @return void
  */
 public function search($lang, $version, $options = [])
 {
     $options += ['query' => '', 'page' => 1, 'sort' => ['_score']];
     $typeName = implode('-', [$version, $lang]);
     // This is a bit dangerous, but this class only has one real method.
     $this->name($typeName);
     $query = $this->query();
     $q = new QueryString($options['query']);
     $q->setPhraseSlop(2)->setFields(['contents', 'title^3'])->setDefaultOperator('AND')->setFuzzyMinSim('0.7');
     $query->page($options['page'], 25)->highlight(['pre_tags' => [''], 'post_tags' => [''], 'fields' => ['contents' => ['fragment_size' => 100, 'number_of_fragments' => 3]]])->where(function ($builder) {
         return $builder->matchAll();
     })->query($q);
     $results = $query->all();
     $rows = $results->map(function ($row) {
         $contents = '';
         if ($row->highlights()) {
             $contents = $row->highlights()['contents'];
         }
         return ['title' => $row->title ?: '', 'url' => $row->url, 'contents' => $contents];
     });
     return ['page' => $options['page'] ?: 1, 'total' => $results->getTotalHits(), 'data' => $rows];
 }
 /**
  * Search revisions with provided term.
  *
  * @param string $term Term to search
  * @return Status
  */
 public function searchText($term)
 {
     // full-text search
     $queryString = new QueryString($term);
     $queryString->setFields(array('revisions.text'));
     $this->query->setQuery($queryString);
     // add aggregation to determine exact amount of matching search terms
     $terms = $this->getTerms($term);
     $this->query->addAggregation($this->termsAggregation($terms));
     // @todo: abstract-away this config? (core/cirrus also has this - share it somehow?)
     $this->query->setHighlight(array('fields' => array(static::HIGHLIGHT_FIELD => array('type' => 'plain', 'order' => 'score', 'number_of_fragments' => 1, 'fragment_size' => 10000)), 'pre_tags' => array(static::HIGHLIGHT_PRE), 'post_tags' => array(static::HIGHLIGHT_POST)));
     // @todo: support insource: queries (and perhaps others)
     $searchable = Connection::getFlowIndex($this->indexBaseName);
     if ($this->type !== false) {
         $searchable = $searchable->getType($this->type);
     }
     $search = $searchable->createSearch($this->query);
     // @todo: PoolCounter config at PoolCounterSettings-eqiad.php
     // @todo: do we want this class to extend from ElasticsearchIntermediary and use its success & failure methods (like CirrusSearch/Searcher does)?
     // Perform the search
     $work = new PoolCounterWorkViaCallback('Flow-Search', "_elasticsearch", array('doWork' => function () use($search) {
         try {
             $result = $search->search();
             return Status::newGood($result);
         } catch (ExceptionInterface $e) {
             if (strpos($e->getMessage(), 'dynamic scripting for [groovy] disabled')) {
                 // known issue with default ES config, let's display a more helpful message
                 return Status::newFatal(new \RawMessage("Couldn't complete search: dynamic scripting needs to be enabled. " . "Please add 'script.disable_dynamic: false' to your elasticsearch.yml"));
             }
             return Status::newFatal('flow-error-search');
         }
     }, 'error' => function (Status $status) {
         $status = $status->getErrorsArray();
         wfLogWarning('Pool error searching Elasticsearch: ' . $status[0][0]);
         return Status::newFatal('flow-error-search');
     }));
     $result = $work->execute();
     return $result;
 }
Example #10
0
 /**
  * Query to search auto
  *
  * @param CSearchThesaurusEntry $favori The favori
  * @param CSejour               $sejour The sejour
  *
  * @return Query
  */
 function querySearchAuto($favori, $sejour)
 {
     $query_bool = new Elastica\Query\Bool();
     // query des séjours
     $query_sejour = new Elastica\Query\QueryString();
     $query_sejour->setQuery($this->constructWordsWithSejour($sejour->_id));
     $query_sejour->setDefaultOperator("and");
     $query_bool->addMust($query_sejour);
     // query du favoris
     $query_words = new Elastica\Query\QueryString();
     $query_words->setQuery($this->normalizeEncoding($favori->entry));
     $query_words->setFields(array("body", "title"));
     $query_words->setDefaultOperator("and");
     $query_bool->addMust($query_words);
     $query = new Query($query_bool);
     //  Pagination
     $query->setFrom(0);
     // Where to start
     $query->setLimit(30);
     //Highlight
     $query->setHighlight(array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fields" => array("body" => array("fragment_size" => 50, "number_of_fragments" => 3, "highlight_query" => array("bool" => array("must" => array("match" => array("body" => array("query" => $this->normalizeEncoding($favori->entry)))), "minimum_should_match" => 1))))));
     return $query;
 }
 /**
  * search query to Elasticsearch.
  *
  * @param $search_query
  * @return true or WP_Error object
  * @since 0.1
  */
 public function search($search_query)
 {
     try {
         $options = get_option('wpels_settings');
         $client = $this->_create_client($options);
         if (!$client) {
             throw new Exception('Couldn\'t make Elasticsearch Client. Parameter is not enough.');
         }
         $type = $client->getIndex($options['index'])->getType($options['type']);
         $qs = new QueryString();
         $qs->setQuery($search_query);
         $query_es = Query::create($qs);
         $resultSet = $type->search($query_es);
         $post_ids = array();
         foreach ($resultSet as $r) {
             $post_ids[] = $r->getID();
         }
         return $post_ids;
     } catch (Exception $e) {
         $err = new WP_Error('Elasticsearch Search Error', $e->getMessage());
         return $err;
     }
 }
Example #12
0
 /**
  * Search contents.
  *
  * @return array $elasticsearches Combine of all results, total and aggregations
  *
  * @since 1.5.0
  */
 public function searchContents()
 {
     //Return array
     $return = array('query' => array('search' => '', 'type' => '', 'paged' => 0, 'perpage' => 0), 'total' => 0, 'types' => array(), 'results' => array());
     //Check page
     if (!is_search()) {
         return $return;
     }
     //Get query vars
     $request = isset($_REQUEST) ? $_REQUEST : array();
     $results = array();
     $types = array();
     //Check request
     if (empty($request)) {
         return $return;
     }
     //Get Elasticsearch datas
     $index = $this->getIndex();
     //Check index
     if (null === $index || empty($index)) {
         return $return;
     }
     //Get search datas
     $search = isset($request['s']) ? str_replace('\\"', '"', $request['s']) : '';
     //Return everything
     if (empty($search)) {
         return $return;
     }
     //Get search datas
     $type = isset($request['type']) ? $request['type'] : '';
     $paged = isset($request['paged']) && !empty($request['paged']) ? $request['paged'] - 1 : 0;
     $perpage = isset($request['perpage']) ? $request['perpage'] : TeaThemeOptions::getOption('posts_per_page', 10);
     //Build query string
     $es_querystring = new QueryString();
     //'And' or 'Or' default: 'Or'
     $es_querystring->setDefaultOperator('OR');
     $es_querystring->setQuery($search);
     //Create the actual search object with some data.
     $es_query = new Query();
     $es_query->setQuery($es_querystring);
     //Define options
     $es_query->setFrom($paged);
     //Start
     $es_query->setLimit($perpage);
     //How many
     //Search!
     $es_resultset = $index->search($es_query);
     //Retrieve data
     $es_results = $es_resultset->getResults();
     //Check results
     if (null == $es_results || empty($es_results)) {
         $return['query']['search'] = str_replace(' ', '+', $search);
         return $return;
     }
     //Iterate to retrieve all IDs
     foreach ($es_results as $res) {
         $typ = $res->getType();
         //Save type
         $types[$typ] = $typ;
         //Save datas
         $results[$typ][] = array('id' => $res->getId(), 'score' => $res->getScore(), 'source' => $res->getSource());
     }
     //Get total
     $total = $es_resultset->getTotalHits();
     //Return everything
     $return = array('query' => array('search' => str_replace(' ', '+', $search), 'type' => $type, 'paged' => $paged, 'perpage' => $perpage), 'total' => $total, 'types' => $types, 'results' => $results);
     return $return;
 }
Example #13
0
 /**
  * @group unit
  */
 public function testSetTimezone()
 {
     $timezone = 'Europe/Paris';
     $text = 'date:[2012 TO 2014]';
     $query = new QueryString($text);
     $query->setTimezone($timezone);
     $expected = array('query_string' => array('query' => $text, 'time_zone' => $timezone));
     $this->assertEquals($expected, $query->toArray());
     $this->assertInstanceOf('Elastica\\Query\\QueryString', $query->setTimezone($timezone));
 }
Example #14
0
 /**
  * method to search log details
  *
  * @param string $operator    the operator for the query
  * @param string $words       the words
  * @param string $names_types the types to search
  *
  * @return \Elastica\ResultSet
  */
 function searchQueryLogDetails($operator, $words, $names_types = null)
 {
     $words = CmbString::normalizeUtf8(stripcslashes($words));
     // Define a Query. We want a string query.
     $elasticaQueryString = new QueryString();
     //'And' or 'Or' default : 'Or'
     $elasticaQueryString->setDefaultOperator($operator);
     $elasticaQueryString->setQuery($words);
     // Create the actual search object with some data.
     $elasticaQuery = new Query();
     $elasticaQuery->setQuery($elasticaQueryString);
     //Search on the index.
     $index = CAppUI::conf("search index_name") . "_log";
     $this->_index = $this->loadIndex($index);
     $search = new \Elastica\Search($this->_client);
     $search->addIndex($this->_index);
     if ($names_types) {
         $search->addTypes($names_types);
     }
     $elasticaQuery->setFrom(0);
     // Where to start
     $elasticaQuery->setLimit(1000);
     return $search->search($elasticaQuery);
 }
 /**
  * @expectedException \Elastica\Exception\InvalidException
  */
 public function testSetQueryInvalid()
 {
     $query = new QueryString();
     $query->setQuery(array());
 }
 /**
  *
  * @param array $location        	
  *
  * @return Elastica\Query $localityQuery
  */
 public static function getLocalityQuery($location)
 {
     $query = new Agent\Elastica\Query\BoolQuery();
     $method = 'addMust';
     if (!isset($location['state']) && !isset($location['zip']) && !isset($location['locality'])) {
         foreach (['phone', 'ipaddress'] as $field) {
             if (isset($location[$field]) && empty($location['state'])) {
                 switch ($field) {
                     case 'ipaddress':
                         $geo = self::$geo;
                         $loc = $geo->getRecord($location['ipaddress']);
                         if ($loc instanceof Record) {
                             $state = $loc->getRegion();
                             if ($state) {
                                 $location['state'] = $state;
                             }
                         }
                         break;
                     case 'phone':
                         $phone = Helper::parse_phonenumber($location['phone'], 'array');
                         if ($phone) {
                             $state = Helper::area_code_to_state($phone[0]);
                             if ($state) {
                                 $location['state'] = $state;
                             }
                         }
                         break;
                 }
             }
         }
     }
     foreach ($location as $field => $value) {
         switch ($field) {
             case 'locality':
                 if (!isset($location['zip'])) {
                     $fields = ['latitude', 'longitude'];
                     $values = is_array($value) ? $value : explode(",", $value);
                     $latlon = count($values) == 2 ? array_combine($fields, $values) : false;
                     if ($latlon) {
                         $path = "location";
                         $nested = new Elastica\Query\Nested();
                         $nested->setPath($path);
                         $bool = new Elastica\Query\BoolQuery();
                         foreach ($latlon as $dim => $coord) {
                             $bool->addMust(new Elastica\Query\Match("{$path}.{$dim}", $coord));
                         }
                         $nested->setQuery($bool);
                         $query->addMust($nested);
                     }
                 }
                 break;
             case 'city':
                 if (!isset($location['locality'])) {
                     $query->addShould(new Elastica\Query\Match($field, $value));
                 }
                 break;
             case 'state':
                 if (!isset($location['locality'])) {
                     $fields = ['state.abbrev', 'state.full'];
                     $values = is_array($value) ? $value : [$value];
                     foreach ($values as $state) {
                         $querystring = new Elastica\Query\QueryString($state);
                         $querystring->setFields($fields);
                         $nested = new Elastica\Query\Nested();
                         $nested->setQuery($querystring);
                         $nested->setPath($field);
                         if (count($values) > 1) {
                             $query->addShould($nested);
                         } else {
                             $query->addMust($nested);
                         }
                     }
                 }
                 break;
             case 'zip':
                 $query->{$method}(new Elastica\Query\Match($field, $value));
                 break;
         }
     }
     $localityQuery = new Elastica\Query($query);
     $localityQuery->setSize(1);
     return $localityQuery;
 }
 /**
  * @param string[] $fields
  * @param string $queryString
  * @param int $phraseSlop
  * @param boolean $isRescore
  * @return \Elastica\Query\Simple
  */
 private function buildSearchTextQueryForFields(array $fields, $queryString, $phraseSlop, $isRescore)
 {
     $query = new \Elastica\Query\QueryString($queryString);
     $query->setFields($fields);
     $query->setAutoGeneratePhraseQueries(true);
     $query->setPhraseSlop($phraseSlop);
     $query->setDefaultOperator('AND');
     $query->setAllowLeadingWildcard($this->config->get('CirrusSearchAllowLeadingWildcard'));
     $query->setFuzzyPrefixLength(2);
     $query->setRewrite('top_terms_boost_1024');
     $states = $this->config->get('CirrusSearchQueryStringMaxDeterminizedStates');
     if (isset($states)) {
         // Requires ES 1.4+
         $query->setParam('max_determinized_states', $states);
     }
     return $this->wrapInSaferIfPossible($query, $isRescore);
 }
Example #18
0
 public function testDeltaSync()
 {
     // @TODO: ensure records flagged as deleted are removed from the index
     // delete and create index if it already exists
     // make index dirty with prepopulated stuff
     $this->createEsIndex();
     $initial_data = $this->populateEsIndex();
     $data_source = Phake::mock('Renegare\\ES\\Tests\\Mock\\TestModel');
     $index = $this->es_test_index_name;
     $doc_type = $this->es_test_doc_type;
     $last_sync = new \DateTime();
     $updated_data = $this->getFakeDeltaSyncData($initial_data);
     $expected_data = array_merge($initial_data, $updated_data);
     // extract data flagged to be deleted ... if any!
     $deleted_data = array();
     foreach ($expected_data as $id => $data) {
         if (isset($data['__deleted'])) {
             $deleted_data[$id] = $data;
             unset($expected_data[$id]);
         }
     }
     $expected_data_count = count($expected_data);
     Phake::when($data_source)->getData($index, $doc_type, $last_sync)->thenReturn($updated_data);
     Phake::when($data_source)->getLastSync($index, $doc_type)->thenReturn($last_sync);
     $this->manager->setDataSource($data_source);
     $this->manager->setLastSyncHandler($data_source);
     $this->manager->sync(ElasticSearchManager::SYNC_DELTA, 'test', 'test_type');
     // make sure our data source is called correctly
     Phake::verify($data_source, Phake::times(1))->getData($index, $doc_type, $last_sync);
     Phake::verify($data_source, Phake::times(1))->getLastSync($index, $doc_type);
     // needs more thought as the date time does not always match and fails
     Phake::verify($data_source, Phake::times(1))->setLastSync($index, $doc_type);
     // make sure that all and only expected_data is in es
     // we are using Elastica Library here and not the ESManager ... #ethical
     $es_index = $this->es_client->getIndex($index);
     $es_query_string = new QueryString();
     $es_query_string->setQuery('*');
     $es_query = new Query();
     $es_query->setQuery($es_query_string);
     // ensure we get everything back!
     $es_query->setFrom(0);
     $es_query->setLimit($expected_data_count);
     //Search on the index.
     $es_result = $es_index->search($es_query);
     $this->assertEquals($expected_data_count, $es_result->getTotalHits());
     foreach ($es_result->getResults() as $result) {
         $data = $result->getData();
         $id = $result->getId();
         $this->assertArrayHasKey($id, $expected_data);
         $this->assertEquals(serialize($expected_data[$id]), serialize($data));
         // ensure a duplicate record does not exist
         unset($expected_data[$id]);
     }
     $this->assertEquals(0, count($expected_data));
     // delete test index
     $this->deleteEsIndex();
 }
    /**
     * Search articles with provided term.
     * @param $term string term to search
     * @param boolean $showSuggestion should this search suggest alternative searches that might be better?
     * @return Status(mixed) status containing results defined by resultsType on success
     */
    public function searchText($term, $showSuggestion)
    {
        $checkLengthStatus = self::checkTextSearchRequestLength($term);
        if (!$checkLengthStatus->isOk()) {
            return $checkLengthStatus;
        }
        // Transform Mediawiki specific syntax to filters and extra (pre-escaped) query string
        $searcher = $this;
        $originalTerm = $term;
        $searchContainedSyntax = false;
        $this->term = $term;
        $this->boostLinks = $this->config->get('CirrusSearchBoostLinks');
        $searchType = 'full_text';
        // Handle title prefix notation
        $prefixPos = strpos($this->term, 'prefix:');
        if ($prefixPos !== false) {
            $value = substr($this->term, 7 + $prefixPos);
            $value = trim($value, '"');
            // Trim quotes in case the user wanted to quote the prefix
            if (strlen($value) > 0) {
                $searchContainedSyntax = true;
                $this->term = substr($this->term, 0, max(0, $prefixPos - 1));
                $this->suggestSuffixes[] = ' prefix:' . $value;
                // Suck namespaces out of $value
                $cirrusSearchEngine = new CirrusSearch();
                $cirrusSearchEngine->setConnection($this->connection);
                $value = trim($cirrusSearchEngine->replacePrefixes($value));
                $this->namespaces = $cirrusSearchEngine->namespaces;
                // If the namespace prefix wasn't the entire prefix filter then add a filter for the title
                if (strpos($value, ':') !== strlen($value) - 1) {
                    $value = str_replace('_', ' ', $value);
                    $prefixQuery = new \Elastica\Query\Match();
                    $prefixQuery->setFieldQuery('title.prefix', $value);
                    $this->filters[] = new \Elastica\Filter\Query($prefixQuery);
                }
            }
        }
        $preferRecentDecayPortion = $this->config->get('CirrusSearchPreferRecentDefaultDecayPortion');
        $preferRecentHalfLife = $this->config->get('CirrusSearchPreferRecentDefaultHalfLife');
        $unspecifiedDecayPortion = $this->config->get('CirrusSearchPreferRecentUnspecifiedDecayPortion');
        // Matches "prefer-recent:" and then an optional floating point number <= 1 but >= 0 (decay
        // portion) and then an optional comma followed by another floating point number >= 0 (half life)
        $this->extractSpecialSyntaxFromTerm('/prefer-recent:(1|0?(?:\\.\\d+)?)?(?:,(\\d*\\.?\\d+))? ?/', function ($matches) use($unspecifiedDecayPortion, &$preferRecentDecayPortion, &$preferRecentHalfLife, &$searchContainedSyntax) {
            if (isset($matches[1]) && strlen($matches[1])) {
                $preferRecentDecayPortion = floatval($matches[1]);
            } else {
                $preferRecentDecayPortion = $unspecifiedDecayPortion;
            }
            if (isset($matches[2])) {
                $preferRecentHalfLife = floatval($matches[2]);
            }
            $searchContainedSyntax = true;
            return '';
        });
        $this->preferRecentDecayPortion = $preferRecentDecayPortion;
        $this->preferRecentHalfLife = $preferRecentHalfLife;
        $this->extractSpecialSyntaxFromTerm('/^\\s*local:/', function ($matches) use($searcher) {
            $searcher->limitSearchToLocalWiki(true);
            return '';
        });
        // Handle other filters
        $filters = $this->filters;
        $notFilters = $this->notFilters;
        $boostTemplates = self::getDefaultBoostTemplates();
        $highlightSource = array();
        $this->extractSpecialSyntaxFromTerm('/(?<not>-)?insource:\\/(?<pattern>(?:[^\\\\\\/]|\\\\.)+)\\/(?<insensitive>i)? ?/', function ($matches) use($searcher, &$filters, &$notFilters, &$searchContainedSyntax, &$searchType, &$highlightSource) {
            if (!$searcher->config->get('CirrusSearchEnableRegex')) {
                return;
            }
            $searchContainedSyntax = true;
            $searchType = 'regex';
            $insensitive = !empty($matches['insensitive']);
            $filterDestination =& $filters;
            if (!empty($matches['not'])) {
                $filterDestination =& $notFilters;
            } else {
                $highlightSource[] = array('pattern' => $matches['pattern'], 'locale' => $searcher->config->get('LanguageCode'), 'insensitive' => $insensitive);
            }
            $regex = $searcher->config->getElement('CirrusSearchWikimediaExtraPlugin', 'regex');
            if ($regex && in_array('use', $regex)) {
                $filter = new SourceRegex($matches['pattern'], 'source_text', 'source_text.trigram');
                if (isset($regex['max_inspect'])) {
                    $filter->setMaxInspect($regex['max_inspect']);
                } else {
                    $filter->setMaxInspect(10000);
                }
                $filter->setMaxDeterminizedStates($searcher->config->get('CirrusSearchRegexMaxDeterminizedStates'));
                if (isset($regex['max_ngrams_extracted'])) {
                    $filter->setMaxNgramExtracted($regex['max_ngrams_extracted']);
                }
                $filter->setCaseSensitive(!$insensitive);
                $filter->setLocale($this->config->get('LanguageCode'));
                $filterDestination[] = $filter;
            } else {
                // Without the extra plugin we need to use groovy to attempt the regex.
                // Its less good but its something.
                $script = <<<GROOVY
import org.apache.lucene.util.automaton.*;
sourceText = _source.get("source_text");
if (sourceText == null) {
\tfalse;
} else {
\tif (automaton == null) {
\t\tif (insensitive) {
\t\t\tlocale = new Locale(language);
\t\t\tpattern = pattern.toLowerCase(locale);
\t\t}
\t\tregexp = new RegExp(pattern, RegExp.ALL ^ RegExp.AUTOMATON);
\t\tautomaton = new CharacterRunAutomaton(regexp.toAutomaton());
\t}
\tif (insensitive) {
\t\tsourceText = sourceText.toLowerCase(locale);
\t}
\tautomaton.run(sourceText);
}

GROOVY;
                $filterDestination[] = new \Elastica\Filter\Script(new \Elastica\Script($script, array('pattern' => '.*(' . $matches['pattern'] . ').*', 'insensitive' => $insensitive, 'language' => $searcher->config->get('LanguageCode'), 'automaton' => null, 'locale' => null), 'groovy'));
            }
        });
        // Match filters that look like foobar:thing or foobar:"thing thing"
        // The {7,15} keeps this from having horrible performance on big strings
        $escaper = $this->escaper;
        $fuzzyQuery = $this->fuzzyQuery;
        $isEmptyQuery = false;
        $this->extractSpecialSyntaxFromTerm('/(?<key>[a-z\\-]{7,15}):\\s*(?<value>"(?<quoted>(?:[^"]|(?<=\\\\)")+)"|(?<unquoted>\\S+)) ?/', function ($matches) use($searcher, $escaper, &$filters, &$notFilters, &$boostTemplates, &$searchContainedSyntax, &$fuzzyQuery, &$highlightSource, &$isEmptyQuery) {
            $key = $matches['key'];
            $quotedValue = $matches['value'];
            $value = $matches['quoted'] !== '' ? str_replace('\\"', '"', $matches['quoted']) : $matches['unquoted'];
            $filterDestination =& $filters;
            $keepText = true;
            if ($key[0] === '-') {
                $key = substr($key, 1);
                $filterDestination =& $notFilters;
                $keepText = false;
            }
            switch ($key) {
                case 'boost-templates':
                    $boostTemplates = Searcher::parseBoostTemplates($value);
                    if ($boostTemplates === null) {
                        $boostTemplates = Searcher::getDefaultBoostTemplates();
                    }
                    $searchContainedSyntax = true;
                    return '';
                case 'hastemplate':
                    // We emulate template syntax here as best as possible,
                    // so things in NS_MAIN are prefixed with ":" and things
                    // in NS_TEMPLATE don't have a prefix at all. Since we
                    // don't actually index templates like that, munge the
                    // query here
                    if (strpos($value, ':') === 0) {
                        $value = substr($value, 1);
                    } else {
                        $title = Title::newFromText($value);
                        if ($title && $title->getNamespace() == NS_MAIN) {
                            $value = Title::makeTitle(NS_TEMPLATE, $title->getDBkey())->getPrefixedText();
                        }
                    }
                    $filterDestination[] = $searcher->matchPage('template', $value);
                    $searchContainedSyntax = true;
                    return '';
                case 'linksto':
                    $filterDestination[] = $searcher->matchPage('outgoing_link', $value, true);
                    $searchContainedSyntax = true;
                    return '';
                case 'incategory':
                    $categories = array_slice(explode('|', $value), 0, $searcher->config->get('CirrusSearchMaxIncategoryOptions'));
                    $categoryFilters = $searcher->matchPageCategories($categories);
                    if ($categoryFilters === null) {
                        $isEmptyQuery = true;
                    } else {
                        $filterDestination[] = $categoryFilters;
                    }
                    $searchContainedSyntax = true;
                    return '';
                case 'insource':
                    $updateReferences = Filters::insource($escaper, $searcher->getSearchContext(), $quotedValue);
                    $updateReferences($fuzzyQuery, $filterDestination, $highlightSource, $searchContainedSyntax);
                    return '';
                case 'intitle':
                    $updateReferences = Filters::intitle($escaper, $searcher->getSearchContext(), $quotedValue);
                    $updateReferences($fuzzyQuery, $filterDestination, $highlightSource, $searchContainedSyntax);
                    return $keepText ? "{$quotedValue} " : '';
                default:
                    return $matches[0];
            }
        });
        if ($isEmptyQuery) {
            return Status::newGood(new SearchResultSet(true));
        }
        $this->filters = $filters;
        $this->notFilters = $notFilters;
        $this->boostTemplates = $boostTemplates;
        $this->searchContext->setSearchContainedSyntax($searchContainedSyntax);
        $this->fuzzyQuery = $fuzzyQuery;
        $this->highlightSource = $highlightSource;
        $this->term = $this->escaper->escapeQuotes($this->term);
        $this->term = trim($this->term);
        // Match quoted phrases including those containing escaped quotes
        // Those phrases can optionally be followed by ~ then a number (this is the phrase slop)
        // That can optionally be followed by a ~ (this matches stemmed words in phrases)
        // The following all match: "a", "a boat", "a\"boat", "a boat"~, "a boat"~9, "a boat"~9~, -"a boat", -"a boat"~9~
        $slop = $this->config->get('CirrusSearchPhraseSlop');
        $query = self::replacePartsOfQuery($this->term, '/(?<![\\]])(?<negate>-|!)?(?<main>"((?:[^"]|(?<=\\\\)")+)"(?<slop>~\\d+)?)(?<fuzzy>~)?/', function ($matches) use($searcher, $escaper, $slop) {
            $negate = $matches['negate'][0] ? 'NOT ' : '';
            $main = $escaper->fixupQueryStringPart($matches['main'][0]);
            if (!$negate && !isset($matches['fuzzy']) && !isset($matches['slop']) && preg_match('/^"([^"*]+)[*]"/', $main, $matches)) {
                $phraseMatch = new Elastica\Query\Match();
                $phraseMatch->setFieldQuery("all.plain", $matches[1]);
                $phraseMatch->setFieldType("all.plain", "phrase_prefix");
                $this->nonTextQueries[] = $phraseMatch;
                $phraseHighlightMatch = new Elastica\Query\QueryString();
                $phraseHighlightMatch->setQuery($matches[1] . '*');
                $phraseHighlightMatch->setFields(array('all.plain'));
                $this->nonTextHighlightQueries[] = $phraseHighlightMatch;
                return array();
            }
            if (!isset($matches['fuzzy'])) {
                if (!isset($matches['slop'])) {
                    $main = $main . '~' . $slop['precise'];
                }
                // Got to collect phrases that don't use the all field so we can highlight them.
                // The highlighter locks phrases to the fields that specify them.  It doesn't do
                // that with terms.
                return array('escaped' => $negate . $searcher->switchSearchToExact($main, true), 'nonAll' => $negate . $searcher->switchSearchToExact($main, false));
            }
            return array('escaped' => $negate . $main);
        });
        // Find prefix matches and force them to only match against the plain analyzed fields.  This
        // prevents prefix matches from getting confused by stemming.  Users really don't expect stemming
        // in prefix queries.
        $query = self::replaceAllPartsOfQuery($query, '/\\w+\\*(?:\\w*\\*?)*/u', function ($matches) use($searcher, $escaper) {
            $term = $escaper->fixupQueryStringPart($matches[0][0]);
            return array('escaped' => $searcher->switchSearchToExactForWildcards($term), 'nonAll' => $searcher->switchSearchToExactForWildcards($term));
        });
        $escapedQuery = array();
        $nonAllQuery = array();
        $nearMatchQuery = array();
        foreach ($query as $queryPart) {
            if (isset($queryPart['escaped'])) {
                $escapedQuery[] = $queryPart['escaped'];
                if (isset($queryPart['nonAll'])) {
                    $nonAllQuery[] = $queryPart['nonAll'];
                } else {
                    $nonAllQuery[] = $queryPart['escaped'];
                }
                continue;
            }
            if (isset($queryPart['raw'])) {
                $fixed = $this->escaper->fixupQueryStringPart($queryPart['raw']);
                $escapedQuery[] = $fixed;
                $nonAllQuery[] = $fixed;
                $nearMatchQuery[] = $queryPart['raw'];
                continue;
            }
            LoggerFactory::getInstance('CirrusSearch')->warning('Unknown query part: {queryPart}', array('queryPart' => serialize($queryPart)));
        }
        // Actual text query
        list($queryStringQueryString, $this->fuzzyQuery) = $escaper->fixupWholeQueryString(implode(' ', $escapedQuery));
        // Note that no escaping is required for near_match's match query.
        $nearMatchQuery = implode(' ', $nearMatchQuery);
        if ($queryStringQueryString !== '') {
            if (preg_match('/(?<!\\\\)[?*+~"!|-]|AND|OR|NOT/', $queryStringQueryString)) {
                $this->searchContext->setSearchContainedSyntax(true);
                // We're unlikey to make good suggestions for query string with special syntax in them....
                $showSuggestion = false;
            }
            $fields = array_merge($this->buildFullTextSearchFields(1, '.plain', true), $this->buildFullTextSearchFields($this->config->get('CirrusSearchStemmedWeight'), '', true));
            $nearMatchFields = $this->buildFullTextSearchFields($this->config->get('CirrusSearchNearMatchWeight'), '.near_match', true);
            $this->query = $this->buildSearchTextQuery($fields, $nearMatchFields, $queryStringQueryString, $nearMatchQuery);
            // The highlighter doesn't know about the weightinging from the all fields so we have to send
            // it a query without the all fields.  This swaps one in.
            if ($this->config->getElement('CirrusSearchAllFields', 'use')) {
                $nonAllFields = array_merge($this->buildFullTextSearchFields(1, '.plain', false), $this->buildFullTextSearchFields($this->config->get('CirrusSearchStemmedWeight'), '', false));
                list($nonAllQueryString, ) = $escaper->fixupWholeQueryString(implode(' ', $nonAllQuery));
                $this->highlightQuery = $this->buildSearchTextQueryForFields($nonAllFields, $nonAllQueryString, 1, false, true);
            } else {
                $nonAllFields = $fields;
            }
            // Only do a phrase match rescore if the query doesn't include any quotes and has a space.
            // Queries without spaces are either single term or have a phrase query generated.
            // Queries with the quote already contain a phrase query and we can't build phrase queries
            // out of phrase queries at this point.
            if ($this->config->get('CirrusSearchPhraseRescoreBoost') > 1.0 && $this->config->get('CirrusSearchPhraseRescoreWindowSize') && !$this->searchContext->isSearchContainedSyntax() && strpos($queryStringQueryString, '"') === false && strpos($queryStringQueryString, ' ') !== false) {
                $rescoreFields = $fields;
                if (!$this->config->get('CirrusSearchAllFieldsForRescore')) {
                    $rescoreFields = $nonAllFields;
                }
                $this->rescore[] = array('window_size' => $this->config->get('CirrusSearchPhraseRescoreWindowSize'), 'query' => array('rescore_query' => $this->buildSearchTextQueryForFields($rescoreFields, '"' . $queryStringQueryString . '"', $this->config->getElement('CirrusSearchPhraseSlop', 'boost'), true), 'query_weight' => 1.0, 'rescore_query_weight' => $this->config->get('CirrusSearchPhraseRescoreBoost')));
            }
            $showSuggestion = $showSuggestion && $this->offset == 0;
            if ($showSuggestion) {
                $this->suggest = array('text' => $this->term, 'suggest' => $this->buildSuggestConfig('suggest'));
            }
            $result = $this->search($searchType, $originalTerm);
            if (!$result->isOK() && $this->isParseError($result)) {
                // Elasticsearch has reported a parse error and we've already logged it when we built the status
                // so at this point all we can do is retry the query as a simple query string query.
                $this->query = new \Elastica\Query\Simple(array('simple_query_string' => array('fields' => $fields, 'query' => $queryStringQueryString, 'default_operator' => 'AND')));
                $this->rescore = array();
                // Not worth trying in this state.
                $result = $this->search('degraded_full_text', $originalTerm);
                // If that doesn't work we're out of luck but it should.  There no guarantee it'll work properly
                // with the syntax we've built above but it'll do _something_ and we'll still work on fixing all
                // the parse errors that come in.
            }
        } else {
            $result = $this->search($searchType, $originalTerm);
            // No need to check for a parse error here because we don't actually create a query for
            // Elasticsearch to parse
        }
        return $result;
    }
 /**
  * @group functional
  */
 public function testSetBoost()
 {
     $index = $this->_createIndex();
     $query = new QueryString('test');
     $query->setBoost(9.300000000000001);
     $doc = new Document('', array('name' => 'test'));
     $index->getType('test')->addDocument($doc);
     $index->refresh();
     $resultSet = $index->search($query);
     $this->assertEquals(1, $resultSet->count());
 }
 /**
  * @param Escaper $escaper
  * @param SearchContext $context
  * @param string $value
  * @param bool $updateHighlightSourceRef
  * @param callable $fieldF
  * @return callable
  */
 private static function insourceOrIntitle($escaper, $context, $value, $updateHighlightSourceRef, $fieldF)
 {
     list($queryString, $fuzzyQuery) = $escaper->fixupWholeQueryString($escaper->fixupQueryStringPart($value));
     $field = $fieldF($queryString);
     $query = new \Elastica\Query\QueryString($queryString);
     $query->setFields(array($field));
     $query->setDefaultOperator('AND');
     $query->setAllowLeadingWildcard($escaper->getAllowLeadingWildcard());
     $query->setFuzzyPrefixLength(2);
     $query->setRewrite('top_terms_boost_1024');
     $wrappedQuery = $context->wrapInSaferIfPossible($query, false);
     $updateReferences = function (&$fuzzyQueryRef, &$filterDestinationRef, &$highlightSourceRef, &$searchContainedSyntaxRef) use($fuzzyQuery, $wrappedQuery, $updateHighlightSourceRef) {
         $fuzzyQueryRef = $fuzzyQuery;
         $filterDestinationRef[] = new \Elastica\Filter\Query($wrappedQuery);
         if ($updateHighlightSourceRef) {
             $highlightSourceRef[] = array('query' => $wrappedQuery);
         }
         $searchContainedSyntaxRef = true;
     };
     return $updateReferences;
 }
Example #22
0
 /**
  * simple search with an operator and words
  *
  * @param string  $operator    'And' or 'Or' default : 'Or'
  * @param string  $words       data
  * @param integer $start       the begining of the paging
  * @param integer $limit       the interval of the paging
  * @param array   $names_types the restrictive type(s) where the search take place.
  * @param bool    $aggregation parameter the search to be aggregated or not.
  *
  * @return \Elastica\ResultSet
  */
 function searchQueryString($operator, $words, $start = 0, $limit = 30, $names_types = null, $aggregation = false)
 {
     $words = CSearch::normalizeEncoding($words);
     // Define a Query. We want a string query.
     $queryString = new Elastica\Query\QueryString($words);
     $queryString->setDefaultOperator("and");
     // Create the actual search object with some data.
     $query = new Elastica\Query($queryString);
     //create aggregation
     if ($aggregation) {
         // on aggrège d'abord par class d'object référents
         // on effectue un sous aggrégation par id des objets référents.
         $agg_by_date = new CSearchAggregation("Terms", "date_log", "date", 10);
         $sub_agg_by_user = new CSearchAggregation("Terms", "user_id", "user_id", 10);
         $sub_agg_by_contexte = new CSearchAggregation("Terms", "contexte", "_type", 10);
         $sub_agg_by_user->_aggregation->addAggregation($sub_agg_by_contexte->_aggregation);
         $agg_by_date->_aggregation->addAggregation($sub_agg_by_user->_aggregation);
         $query->addAggregation($agg_by_date->_aggregation);
     } else {
         //  Pagination
         $query->setFrom($start);
         // Where to start
         $query->setLimit($limit);
     }
     //Highlight
     $query->setHighlight(array("fields" => array("body" => array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fragment_size" => 80, "number_of_fragments" => 10))));
     //Search on the index.
     $index = CAppUI::conf("search index_name") . "_log";
     $index = $this->loadIndex($index);
     $search = new \Elastica\Search($this->_client);
     $search->addIndex($index);
     if ($names_types) {
         $search->addTypes($names_types);
     }
     return $search->search($query);
 }