예제 #1
0
 public function autocomplete($term, $city = null, $limit = 10)
 {
     //        if($term != null){
     //            $prefixQuery = new \Elastica\Query\Prefix();
     //            $prefixQuery->setPrefix('tag.name', $term);
     //        }
     //        else{
     //            $prefixQuery = new \Elastica\Query\MatchAll();
     //        }
     $boolQuery = new \Elastica\Query\Bool();
     //TODO CHECK VALIDATED
     $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis();
     $fuzzyQuery->addFields(['nameTranslatableRU', 'nameTranslatableEN']);
     $fuzzyQuery->setLikeText($term);
     if ($city) {
         $queryCity = new \Elastica\Query\Match();
         $queryCity->setFieldQuery('city.slug', $city);
         $boolQuery->addMust($queryCity);
     }
     $boolQuery->addMust($fuzzyQuery);
     //$baseQuery = $prefixQuery;
     $filtered = new \Elastica\Query\Filtered($boolQuery);
     $query = \Elastica\Query::create($filtered);
     return $this->find($query, $limit);
 }
예제 #2
0
 public function autocomplete($term, $limit = 10)
 {
     $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis();
     $fuzzyQuery->addFields(['nameTranslatableRU', 'nameTranslatableEN']);
     $fuzzyQuery->setLikeText($term);
     //$baseQuery = $prefixQuery;
     $filtered = new \Elastica\Query\Filtered($fuzzyQuery);
     $query = \Elastica\Query::create($filtered);
     return $this->find($query, $limit);
 }
예제 #3
0
 /**
  * @param Hackathon_ElasticgentoCore_Model_Resource_Client $searchAdapter
  * @param string $queryText
  * @param Mage_CatalogSearch_Model_Query $query
  *
  * @return Elastica\ResultSet
  */
 protected function fetchSearchResultFromElasticSearch($searchAdapter, $queryText, $query)
 {
     $elasticQuery = new Elastica\Query();
     $queryFuzzyLikeThis = new \Elastica\Query\FuzzyLikeThis();
     $queryFuzzyLikeThis->addFields(Mage::helper('elasticgento_catalogsearch/data')->getSearchableElasticSearchFieldNames());
     $queryFuzzyLikeThis->setLikeText($queryText);
     $elasticQuery->setQuery($queryFuzzyLikeThis);
     $returnFields = ['entity_id', 'name'];
     $elasticQuery->setFields($returnFields);
     return $searchAdapter->getIndex($query->getStoreId())->search($elasticQuery);
 }
예제 #4
0
 /**
  * @param string $q
  * @return \Elastica\Query\AbstractQuery
  */
 protected function _getBaseQuery($q)
 {
     if (empty($q)) {
         $baseQuery = new \Elastica\Query\MatchAll();
     } else {
         $q = $this->_helper->_searchQuery($q);
         $baseQuery = new \Elastica\Query\Bool();
         if ($this->isFuzzyQueryEnabled()) {
             $fuzzy = new \Elastica\Query\FuzzyLikeThis();
             $fuzzy->addFields($this->_getSearchFields(true, $q))->setLikeText($q)->setMinSimilarity($this->getFuzzyMinSimilarity())->setPrefixLength($this->getFuzzyPrefixLength())->setMaxQueryTerms($this->getFuzzyMaxQueryTerms())->setBoost($this->getFuzzyQueryBoost());
             $baseQuery->addShould($fuzzy);
         }
         $queryString = new \Elastica\Query\QueryString($q);
         $queryString->setFields($this->_getSearchFields(true, $q));
         $queryString->setDefaultOperator($this->getQueryOperator());
         $queryString->setBoost('2');
         $baseQuery->addShould($queryString);
     }
     return $baseQuery;
 }
예제 #5
0
 /**
  * simple search with an operator and words
  *
  * @param string  $words         data
  * @param integer $start         the begining of the paging
  * @param integer $limit         the interval of the paging
  * @param bool    $aggregation   parameter the search to be aggregated or not.
  * @param integer $sejour_id     the id of the sejour
  * @param string  $specific_user the ids of users selected
  * @param bool    $details       details of query
  * @param string  $date          date of query
  * @param bool    $fuzzy_search  fuzzy the query
  *
  * @return \Elastica\Query
  */
 function searchQueryString($words, $start = 0, $limit = 30, $aggregation = false, $sejour_id = null, $specific_user = null, $details = null, $date = null, $fuzzy_search = null)
 {
     // Initialisation des mots pour la recherche
     $prats = $this->constructWordsWithPrat($specific_user, $sejour_id);
     $sejour = $this->constructWordsWithSejour($sejour_id);
     $words = CmbString::normalizeUtf8(stripcslashes($words));
     $query_bool = new Elastica\Query\Bool();
     //query date
     if ($date) {
         $query_date = new Elastica\Query\QueryString();
         $query_date->setQuery($date);
         $query_date->setDefaultOperator("and");
         $query_bool->addMust($query_date);
     }
     //query mots
     if ($words) {
         if ($fuzzy_search) {
             $query_fuzzy = new Elastica\Query\FuzzyLikeThis();
             $query_fuzzy->addFields(array("body", "title"));
             $query_fuzzy->setLikeText($words);
             $query_fuzzy->setMinSimilarity(0.3);
             $query_fuzzy->setMaxQueryTerms(3);
             $query_bool->addMust($query_fuzzy);
         } else {
             $query_words = new Elastica\Query\QueryString($words);
             $query_words->setFields(array("body", "title"));
             $query_words->setDefaultOperator("and");
             $query_bool->addMust($query_words);
         }
     }
     //query détails
     if ($details) {
         $query_details = new Elastica\Query\QueryString();
         $query_details->setQuery($details);
         $query_details->setDefaultOperator("and");
         $query_bool->addMust($query_details);
     } else {
         // query prat_id
         $query_prat = new Elastica\Query\QueryString();
         $query_prat->setQuery("prat_id:({$prats})");
         $query_prat->setDefaultField("prat_id");
         $query_bool->addMust($query_prat);
         //query sejour
         if ($sejour) {
             $query_sejour = new Elastica\Query\QueryString();
             $query_sejour->setQuery($sejour);
             $query_sejour->setDefaultOperator("and");
             $query_bool->addMust($query_sejour);
         }
     }
     $query = new Query($query_bool);
     //create aggregation
     if ($aggregation && $aggregation != "by_type") {
         // on aggrège d'abord par class d'object référents
         // on effectue un sous aggrégation par id des objets référents.
         $agg_by_class = new CSearchAggregation("Terms", "ref_class", "object_ref_class", 10);
         $sub_agg_by_id = new CSearchAggregation("Terms", "sub_ref_id", "object_ref_id", 100);
         $sub_agg_by_type = new CSearchAggregation("Terms", "sub_ref_type", "_type", 100);
         $sub_agg_by_id->_aggregation->addAggregation($sub_agg_by_type->_aggregation);
         $agg_by_class->_aggregation->addAggregation($sub_agg_by_id->_aggregation);
         $query->addAggregation($agg_by_class->_aggregation);
     } else {
         if (!$aggregation) {
             //  Pagination
             $query->setFrom($start);
             // Where to start
             $query->setLimit($limit);
         } else {
             $agg_by_type = new CSearchAggregation("Terms", "ref_type", "_type", 100);
             $query->addAggregation($agg_by_type->_aggregation);
         }
     }
     //Highlight
     if ($words) {
         $query->setHighlight(array("pre_tags" => array(" <em> <strong> "), "post_tags" => array(" </strong> </em>"), "fields" => array("body" => array("fragment_size" => 50, "number_of_fragments" => 3, "highlight_query" => array("bool" => array("must" => array("match" => array("body" => array("query" => $words))), "minimum_should_match" => 1))))));
     }
     return $query;
 }
    protected function doQuery($sourceLanguage, $targetLanguage, $text)
    {
        /* Two query system:
         * 1) Find all strings in source language that match text
         * 2) Do another query for translations for those strings
         */
        $connection = $this->getClient()->getConnection();
        $oldTimeout = $connection->getTimeout();
        $connection->setTimeout(10);
        $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis();
        $fuzzyQuery->setLikeText($text);
        $fuzzyQuery->addFields(array('content'));
        $boostQuery = new \Elastica\Query\FunctionScore();
        if ($this->useWikimediaExtraPlugin()) {
            $boostQuery->addFunction('levenshtein_distance_score', array('text' => $text, 'field' => 'content'));
        } else {
            $groovyScript = <<<GROOVY
import org.apache.lucene.search.spell.*
new LevensteinDistance().getDistance(srctxt, _source['content'])
GROOVY;
            $script = new \Elastica\Script($groovyScript, array('srctxt' => $text), \Elastica\Script::LANG_GROOVY);
            $boostQuery->addScriptScoreFunction($script);
        }
        $boostQuery->setBoostMode(\Elastica\Query\FunctionScore::BOOST_MODE_REPLACE);
        // Wrap the fuzzy query so it can be used as a filter.
        // This is slightly faster, as ES can throw away the scores by this query.
        $fuzzyFilter = new \Elastica\Filter\Query();
        $fuzzyFilter->setQuery($fuzzyQuery);
        $boostQuery->setFilter($fuzzyFilter);
        // Use filtered query to wrap function score and language filter
        $filteredQuery = new \Elastica\Query\Filtered();
        $languageFilter = new \Elastica\Filter\Term();
        $languageFilter->setTerm('language', $sourceLanguage);
        $filteredQuery->setFilter($languageFilter);
        $filteredQuery->setQuery($boostQuery);
        // The whole query
        $query = new \Elastica\Query();
        $query->setQuery($filteredQuery);
        // The interface usually displays three best candidates. These might
        // come from more than three source things, if the translations are
        // the same. In other words suggestions are grouped by the suggested
        // translation. This algorithm might not find all suggestions, if the
        // top N best matching source texts don't have equivalent translations
        // in the target language, but worse matches which we did not fetch do.
        // This code tries to balance between doing too many or too big queries
        // and not fetching enough results to show all possible suggestions.
        $sizeFirst = 100;
        $sizeSecond = $sizeFirst * 5;
        $query->setFrom(0);
        $query->setSize($sizeFirst);
        $query->setParam('_source', array('content'));
        $cutoff = isset($this->config['cutoff']) ? $this->config['cutoff'] : 0.65;
        $query->setParam('min_score', $cutoff);
        $query->setSort(array('_score', '_uid'));
        // This query is doing two unrelated things:
        // 1) Collect the message contents and scores so that they can
        //    be accessed later for the translations we found.
        // 2) Build the query string for the query that fetches the translations.
        $contents = $scores = $terms = array();
        do {
            $resultset = $this->getType()->search($query);
            if (count($resultset) === 0) {
                break;
            }
            foreach ($resultset->getResults() as $result) {
                $data = $result->getData();
                $score = $result->getScore();
                $sourceId = preg_replace('~/[^/]+$~', '', $result->getId());
                $contents[$sourceId] = $data['content'];
                $scores[$sourceId] = $score;
                $terms[] = "{$sourceId}/{$targetLanguage}";
            }
            // Check if it looks like that we are hitting the long tail already.
            // Otherwise, we'll do a query to fetch some more to reach a "sane"
            // breaking point, i.e. include all suggestions with same content
            // for reliable used X times statistics.
            if (count(array_unique($scores)) > 5) {
                break;
            }
            // Okay, We are now in second iteration of the loop. We already got
            // lots of suggestions. We will give up for now even if it means we
            // return in some sense incomplete results.
            if (count($resultset) === $sizeSecond) {
                break;
            }
            // After the first query, the smallest score is the new threshold.
            $query->setParam('min_score', $score);
            $query->setFrom($query->getParam('size') + $query->getParam('from'));
            $query->setSize($sizeSecond);
            // Break if we already got all hits
        } while ($resultset->getTotalHits() > count($contents));
        $suggestions = array();
        // Skip second query if first query found nothing. Keeping only one return
        // statement in this method to avoid forgetting to reset connection timeout
        if ($terms !== array()) {
            $idQuery = new \Elastica\Query\Terms();
            $idQuery->setTerms('_id', $terms);
            $query = new \Elastica\Query($idQuery);
            $query->setSize(25);
            $query->setParam('_source', array('wiki', 'uri', 'content', 'localid'));
            $resultset = $this->getType()->search($query);
            foreach ($resultset->getResults() as $result) {
                $data = $result->getData();
                // Construct the matching source id
                $sourceId = preg_replace('~/[^/]+$~', '', $result->getId());
                $suggestions[] = array('source' => $contents[$sourceId], 'target' => $data['content'], 'context' => $data['localid'], 'quality' => $scores[$sourceId], 'wiki' => $data['wiki'], 'location' => $data['localid'] . '/' . $targetLanguage, 'uri' => $data['uri']);
            }
            // Ensure reults are in quality order
            uasort($suggestions, function ($a, $b) {
                if ($a['quality'] === $b['quality']) {
                    return 0;
                }
                return $a['quality'] < $b['quality'] ? 1 : -1;
            });
        }
        $connection->setTimeout($oldTimeout);
        return $suggestions;
    }