public function autocomplete($term, $city = null, $limit = 10) { // if($term != null){ // $prefixQuery = new \Elastica\Query\Prefix(); // $prefixQuery->setPrefix('tag.name', $term); // } // else{ // $prefixQuery = new \Elastica\Query\MatchAll(); // } $boolQuery = new \Elastica\Query\Bool(); //TODO CHECK VALIDATED $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis(); $fuzzyQuery->addFields(['nameTranslatableRU', 'nameTranslatableEN']); $fuzzyQuery->setLikeText($term); if ($city) { $queryCity = new \Elastica\Query\Match(); $queryCity->setFieldQuery('city.slug', $city); $boolQuery->addMust($queryCity); } $boolQuery->addMust($fuzzyQuery); //$baseQuery = $prefixQuery; $filtered = new \Elastica\Query\Filtered($boolQuery); $query = \Elastica\Query::create($filtered); return $this->find($query, $limit); }
public function autocomplete($term, $limit = 10) { $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis(); $fuzzyQuery->addFields(['nameTranslatableRU', 'nameTranslatableEN']); $fuzzyQuery->setLikeText($term); //$baseQuery = $prefixQuery; $filtered = new \Elastica\Query\Filtered($fuzzyQuery); $query = \Elastica\Query::create($filtered); return $this->find($query, $limit); }
/** * @param Hackathon_ElasticgentoCore_Model_Resource_Client $searchAdapter * @param string $queryText * @param Mage_CatalogSearch_Model_Query $query * * @return Elastica\ResultSet */ protected function fetchSearchResultFromElasticSearch($searchAdapter, $queryText, $query) { $elasticQuery = new Elastica\Query(); $queryFuzzyLikeThis = new \Elastica\Query\FuzzyLikeThis(); $queryFuzzyLikeThis->addFields(Mage::helper('elasticgento_catalogsearch/data')->getSearchableElasticSearchFieldNames()); $queryFuzzyLikeThis->setLikeText($queryText); $elasticQuery->setQuery($queryFuzzyLikeThis); $returnFields = ['entity_id', 'name']; $elasticQuery->setFields($returnFields); return $searchAdapter->getIndex($query->getStoreId())->search($elasticQuery); }
/** * @param string $q * @return \Elastica\Query\AbstractQuery */ protected function _getBaseQuery($q) { if (empty($q)) { $baseQuery = new \Elastica\Query\MatchAll(); } else { $q = $this->_helper->_searchQuery($q); $baseQuery = new \Elastica\Query\Bool(); if ($this->isFuzzyQueryEnabled()) { $fuzzy = new \Elastica\Query\FuzzyLikeThis(); $fuzzy->addFields($this->_getSearchFields(true, $q))->setLikeText($q)->setMinSimilarity($this->getFuzzyMinSimilarity())->setPrefixLength($this->getFuzzyPrefixLength())->setMaxQueryTerms($this->getFuzzyMaxQueryTerms())->setBoost($this->getFuzzyQueryBoost()); $baseQuery->addShould($fuzzy); } $queryString = new \Elastica\Query\QueryString($q); $queryString->setFields($this->_getSearchFields(true, $q)); $queryString->setDefaultOperator($this->getQueryOperator()); $queryString->setBoost('2'); $baseQuery->addShould($queryString); } return $baseQuery; }
/** * simple search with an operator and words * * @param string $words data * @param integer $start the begining of the paging * @param integer $limit the interval of the paging * @param bool $aggregation parameter the search to be aggregated or not. * @param integer $sejour_id the id of the sejour * @param string $specific_user the ids of users selected * @param bool $details details of query * @param string $date date of query * @param bool $fuzzy_search fuzzy the query * * @return \Elastica\Query */ function searchQueryString($words, $start = 0, $limit = 30, $aggregation = false, $sejour_id = null, $specific_user = null, $details = null, $date = null, $fuzzy_search = null) { // Initialisation des mots pour la recherche $prats = $this->constructWordsWithPrat($specific_user, $sejour_id); $sejour = $this->constructWordsWithSejour($sejour_id); $words = CmbString::normalizeUtf8(stripcslashes($words)); $query_bool = new Elastica\Query\Bool(); //query date if ($date) { $query_date = new Elastica\Query\QueryString(); $query_date->setQuery($date); $query_date->setDefaultOperator("and"); $query_bool->addMust($query_date); } //query mots if ($words) { if ($fuzzy_search) { $query_fuzzy = new Elastica\Query\FuzzyLikeThis(); $query_fuzzy->addFields(array("body", "title")); $query_fuzzy->setLikeText($words); $query_fuzzy->setMinSimilarity(0.3); $query_fuzzy->setMaxQueryTerms(3); $query_bool->addMust($query_fuzzy); } else { $query_words = new Elastica\Query\QueryString($words); $query_words->setFields(array("body", "title")); $query_words->setDefaultOperator("and"); $query_bool->addMust($query_words); } } //query détails if ($details) { $query_details = new Elastica\Query\QueryString(); $query_details->setQuery($details); $query_details->setDefaultOperator("and"); $query_bool->addMust($query_details); } else { // query prat_id $query_prat = new Elastica\Query\QueryString(); $query_prat->setQuery("prat_id:({$prats})"); $query_prat->setDefaultField("prat_id"); $query_bool->addMust($query_prat); //query sejour if ($sejour) { $query_sejour = new Elastica\Query\QueryString(); $query_sejour->setQuery($sejour); $query_sejour->setDefaultOperator("and"); $query_bool->addMust($query_sejour); } } $query = new Query($query_bool); //create aggregation if ($aggregation && $aggregation != "by_type") { // on aggrège d'abord par class d'object référents // on effectue un sous aggrégation par id des objets référents. $agg_by_class = new CSearchAggregation("Terms", "ref_class", "object_ref_class", 10); $sub_agg_by_id = new CSearchAggregation("Terms", "sub_ref_id", "object_ref_id", 100); $sub_agg_by_type = new CSearchAggregation("Terms", "sub_ref_type", "_type", 100); $sub_agg_by_id->_aggregation->addAggregation($sub_agg_by_type->_aggregation); $agg_by_class->_aggregation->addAggregation($sub_agg_by_id->_aggregation); $query->addAggregation($agg_by_class->_aggregation); } else { if (!$aggregation) { // Pagination $query->setFrom($start); // Where to start $query->setLimit($limit); } else { $agg_by_type = new CSearchAggregation("Terms", "ref_type", "_type", 100); $query->addAggregation($agg_by_type->_aggregation); } } //Highlight if ($words) { $query->setHighlight(array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fields" => array("body" => array("fragment_size" => 50, "number_of_fragments" => 3, "highlight_query" => array("bool" => array("must" => array("match" => array("body" => array("query" => $words))), "minimum_should_match" => 1)))))); } return $query; }
protected function doQuery($sourceLanguage, $targetLanguage, $text) { /* Two query system: * 1) Find all strings in source language that match text * 2) Do another query for translations for those strings */ $connection = $this->getClient()->getConnection(); $oldTimeout = $connection->getTimeout(); $connection->setTimeout(10); $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis(); $fuzzyQuery->setLikeText($text); $fuzzyQuery->addFields(array('content')); $boostQuery = new \Elastica\Query\FunctionScore(); if ($this->useWikimediaExtraPlugin()) { $boostQuery->addFunction('levenshtein_distance_score', array('text' => $text, 'field' => 'content')); } else { $groovyScript = <<<GROOVY import org.apache.lucene.search.spell.* new LevensteinDistance().getDistance(srctxt, _source['content']) GROOVY; $script = new \Elastica\Script($groovyScript, array('srctxt' => $text), \Elastica\Script::LANG_GROOVY); $boostQuery->addScriptScoreFunction($script); } $boostQuery->setBoostMode(\Elastica\Query\FunctionScore::BOOST_MODE_REPLACE); // Wrap the fuzzy query so it can be used as a filter. // This is slightly faster, as ES can throw away the scores by this query. $fuzzyFilter = new \Elastica\Filter\Query(); $fuzzyFilter->setQuery($fuzzyQuery); $boostQuery->setFilter($fuzzyFilter); // Use filtered query to wrap function score and language filter $filteredQuery = new \Elastica\Query\Filtered(); $languageFilter = new \Elastica\Filter\Term(); $languageFilter->setTerm('language', $sourceLanguage); $filteredQuery->setFilter($languageFilter); $filteredQuery->setQuery($boostQuery); // The whole query $query = new \Elastica\Query(); $query->setQuery($filteredQuery); // The interface usually displays three best candidates. These might // come from more than three source things, if the translations are // the same. In other words suggestions are grouped by the suggested // translation. This algorithm might not find all suggestions, if the // top N best matching source texts don't have equivalent translations // in the target language, but worse matches which we did not fetch do. // This code tries to balance between doing too many or too big queries // and not fetching enough results to show all possible suggestions. $sizeFirst = 100; $sizeSecond = $sizeFirst * 5; $query->setFrom(0); $query->setSize($sizeFirst); $query->setParam('_source', array('content')); $cutoff = isset($this->config['cutoff']) ? $this->config['cutoff'] : 0.65; $query->setParam('min_score', $cutoff); $query->setSort(array('_score', '_uid')); // This query is doing two unrelated things: // 1) Collect the message contents and scores so that they can // be accessed later for the translations we found. // 2) Build the query string for the query that fetches the translations. $contents = $scores = $terms = array(); do { $resultset = $this->getType()->search($query); if (count($resultset) === 0) { break; } foreach ($resultset->getResults() as $result) { $data = $result->getData(); $score = $result->getScore(); $sourceId = preg_replace('~/[^/]+$~', '', $result->getId()); $contents[$sourceId] = $data['content']; $scores[$sourceId] = $score; $terms[] = "{$sourceId}/{$targetLanguage}"; } // Check if it looks like that we are hitting the long tail already. // Otherwise, we'll do a query to fetch some more to reach a "sane" // breaking point, i.e. include all suggestions with same content // for reliable used X times statistics. if (count(array_unique($scores)) > 5) { break; } // Okay, We are now in second iteration of the loop. We already got // lots of suggestions. We will give up for now even if it means we // return in some sense incomplete results. if (count($resultset) === $sizeSecond) { break; } // After the first query, the smallest score is the new threshold. $query->setParam('min_score', $score); $query->setFrom($query->getParam('size') + $query->getParam('from')); $query->setSize($sizeSecond); // Break if we already got all hits } while ($resultset->getTotalHits() > count($contents)); $suggestions = array(); // Skip second query if first query found nothing. Keeping only one return // statement in this method to avoid forgetting to reset connection timeout if ($terms !== array()) { $idQuery = new \Elastica\Query\Terms(); $idQuery->setTerms('_id', $terms); $query = new \Elastica\Query($idQuery); $query->setSize(25); $query->setParam('_source', array('wiki', 'uri', 'content', 'localid')); $resultset = $this->getType()->search($query); foreach ($resultset->getResults() as $result) { $data = $result->getData(); // Construct the matching source id $sourceId = preg_replace('~/[^/]+$~', '', $result->getId()); $suggestions[] = array('source' => $contents[$sourceId], 'target' => $data['content'], 'context' => $data['localid'], 'quality' => $scores[$sourceId], 'wiki' => $data['wiki'], 'location' => $data['localid'] . '/' . $targetLanguage, 'uri' => $data['uri']); } // Ensure reults are in quality order uasort($suggestions, function ($a, $b) { if ($a['quality'] === $b['quality']) { return 0; } return $a['quality'] < $b['quality'] ? 1 : -1; }); } $connection->setTimeout($oldTimeout); return $suggestions; }