public function append(FunctionScore $functionScore)
 {
     if (!$this->boostTemplates) {
         return;
     }
     foreach ($this->boostTemplates as $name => $weight) {
         $match = new \Elastica\Query\Match();
         $match->setFieldQuery('template', $name);
         $filterQuery = new \Elastica\Filter\Query($match);
         $filterQuery->setCached(true);
         $functionScore->addWeightFunction($weight * $this->weight, $filterQuery);
     }
 }
Exemple #2
0
 if (isset($_GET['collection']) && $_GET['collection'] != 'all') {
     $boolQuery = new Elastica\Query\Bool();
     $query_collection = new Elastica\Query\Match();
     $query_collection->setFieldQuery('content_type', $_GET['collection']);
     $elasticaTypeOfFilter = new Elastica\Filter\Query();
     $elasticaTypeOfFilter->setQuery($query_collection);
     $elasticaFilterAnd = new Elastica\Filter\BoolAnd();
     $elasticaFilterAnd->addFilter($elasticaTypeOfFilter);
     $filteredQuery = new Elastica\Query\Filtered($queryString, $elasticaFilterAnd);
 }
 $query = new Elastica\Query($filteredQuery);
 if (isset($_GET['speaker'])) {
     $boolQuery = new Elastica\Query\Bool();
     $query_collection = new Elastica\Query\Match();
     $query_collection->setFieldQuery('speaker', $_GET['speaker'])->setFieldParam('speaker', 'type', 'phrase');
     $elasticaTypeOfFilter = new Elastica\Filter\Query();
     $elasticaTypeOfFilter->setQuery($query_collection);
     $elasticaFilterAnd = new Elastica\Filter\BoolAnd();
     $elasticaFilterAnd->addFilter($elasticaTypeOfFilter);
     $filteredQuery = new Elastica\Query\Filtered($queryString, $elasticaFilterAnd);
     $query = new Elastica\Query($filteredQuery);
 }
 //SORT BY
 if (isset($_GET['sort-by'])) {
     if (isset($_GET['sort-by-direction'])) {
         $sortdirection = $_GET['sort-by-direction'];
     } else {
         $sortdirection = "desc";
     }
     $sort = array($_GET['sort-by'] => array("order" => $sortdirection));
     //order by clause
    protected function doQuery($sourceLanguage, $targetLanguage, $text)
    {
        /* Two query system:
         * 1) Find all strings in source language that match text
         * 2) Do another query for translations for those strings
         */
        $connection = $this->getClient()->getConnection();
        $oldTimeout = $connection->getTimeout();
        $connection->setTimeout(10);
        $fuzzyQuery = new \Elastica\Query\FuzzyLikeThis();
        $fuzzyQuery->setLikeText($text);
        $fuzzyQuery->addFields(array('content'));
        $boostQuery = new \Elastica\Query\FunctionScore();
        if ($this->useWikimediaExtraPlugin()) {
            $boostQuery->addFunction('levenshtein_distance_score', array('text' => $text, 'field' => 'content'));
        } else {
            $groovyScript = <<<GROOVY
import org.apache.lucene.search.spell.*
new LevensteinDistance().getDistance(srctxt, _source['content'])
GROOVY;
            $script = new \Elastica\Script($groovyScript, array('srctxt' => $text), \Elastica\Script::LANG_GROOVY);
            $boostQuery->addScriptScoreFunction($script);
        }
        $boostQuery->setBoostMode(\Elastica\Query\FunctionScore::BOOST_MODE_REPLACE);
        // Wrap the fuzzy query so it can be used as a filter.
        // This is slightly faster, as ES can throw away the scores by this query.
        $fuzzyFilter = new \Elastica\Filter\Query();
        $fuzzyFilter->setQuery($fuzzyQuery);
        $boostQuery->setFilter($fuzzyFilter);
        // Use filtered query to wrap function score and language filter
        $filteredQuery = new \Elastica\Query\Filtered();
        $languageFilter = new \Elastica\Filter\Term();
        $languageFilter->setTerm('language', $sourceLanguage);
        $filteredQuery->setFilter($languageFilter);
        $filteredQuery->setQuery($boostQuery);
        // The whole query
        $query = new \Elastica\Query();
        $query->setQuery($filteredQuery);
        // The interface usually displays three best candidates. These might
        // come from more than three source things, if the translations are
        // the same. In other words suggestions are grouped by the suggested
        // translation. This algorithm might not find all suggestions, if the
        // top N best matching source texts don't have equivalent translations
        // in the target language, but worse matches which we did not fetch do.
        // This code tries to balance between doing too many or too big queries
        // and not fetching enough results to show all possible suggestions.
        $sizeFirst = 100;
        $sizeSecond = $sizeFirst * 5;
        $query->setFrom(0);
        $query->setSize($sizeFirst);
        $query->setParam('_source', array('content'));
        $cutoff = isset($this->config['cutoff']) ? $this->config['cutoff'] : 0.65;
        $query->setParam('min_score', $cutoff);
        $query->setSort(array('_score', '_uid'));
        // This query is doing two unrelated things:
        // 1) Collect the message contents and scores so that they can
        //    be accessed later for the translations we found.
        // 2) Build the query string for the query that fetches the translations.
        $contents = $scores = $terms = array();
        do {
            $resultset = $this->getType()->search($query);
            if (count($resultset) === 0) {
                break;
            }
            foreach ($resultset->getResults() as $result) {
                $data = $result->getData();
                $score = $result->getScore();
                $sourceId = preg_replace('~/[^/]+$~', '', $result->getId());
                $contents[$sourceId] = $data['content'];
                $scores[$sourceId] = $score;
                $terms[] = "{$sourceId}/{$targetLanguage}";
            }
            // Check if it looks like that we are hitting the long tail already.
            // Otherwise, we'll do a query to fetch some more to reach a "sane"
            // breaking point, i.e. include all suggestions with same content
            // for reliable used X times statistics.
            if (count(array_unique($scores)) > 5) {
                break;
            }
            // Okay, We are now in second iteration of the loop. We already got
            // lots of suggestions. We will give up for now even if it means we
            // return in some sense incomplete results.
            if (count($resultset) === $sizeSecond) {
                break;
            }
            // After the first query, the smallest score is the new threshold.
            $query->setParam('min_score', $score);
            $query->setFrom($query->getParam('size') + $query->getParam('from'));
            $query->setSize($sizeSecond);
            // Break if we already got all hits
        } while ($resultset->getTotalHits() > count($contents));
        $suggestions = array();
        // Skip second query if first query found nothing. Keeping only one return
        // statement in this method to avoid forgetting to reset connection timeout
        if ($terms !== array()) {
            $idQuery = new \Elastica\Query\Terms();
            $idQuery->setTerms('_id', $terms);
            $query = new \Elastica\Query($idQuery);
            $query->setSize(25);
            $query->setParam('_source', array('wiki', 'uri', 'content', 'localid'));
            $resultset = $this->getType()->search($query);
            foreach ($resultset->getResults() as $result) {
                $data = $result->getData();
                // Construct the matching source id
                $sourceId = preg_replace('~/[^/]+$~', '', $result->getId());
                $suggestions[] = array('source' => $contents[$sourceId], 'target' => $data['content'], 'context' => $data['localid'], 'quality' => $scores[$sourceId], 'wiki' => $data['wiki'], 'location' => $data['localid'] . '/' . $targetLanguage, 'uri' => $data['uri']);
            }
            // Ensure reults are in quality order
            uasort($suggestions, function ($a, $b) {
                if ($a['quality'] === $b['quality']) {
                    return 0;
                }
                return $a['quality'] < $b['quality'] ? 1 : -1;
            });
        }
        $connection->setTimeout($oldTimeout);
        return $suggestions;
    }