Ejemplo n.º 1
0
    /**
     * Transforms a generic Query object into an Elastic Search query DSL
     *
     * @param Query $query
     */
    public function mapQuery(Query $query)
    {
        $arguments = array();

        $map = new Boolean();

        if ($query->hasQueryString()) {

            Lucene::setDefaultSearchField($query->getQueryString()->getDefaultField());

            QueryParser::setDefaultOperator($query->getQueryString()->getDefaultOperator() == Query::OPERATOR_AND ? QueryParser::B_AND : QueryParser::B_OR);

            $keyword = $query->getQueryString()->getQuery();

            if ("*" === $keyword) {
                $subQuery = new Wildcard(new Term($keyword));
                $subQuery->setMinPrefixLength(0);
            } else {
                $subQuery = QueryParser::parse($keyword);
            }

            $map->addSubquery($subQuery, true);
        }

        $arguments[] = $map;

        foreach ($query->getSort() as $sort) {
            $arguments[] = key($sort);
            $arguments[] = SORT_REGULAR;
            $arguments[] = current($sort) == 'asc' ? SORT_ASC : SORT_DESC;
        }

        return $arguments;
    }
Ejemplo n.º 2
0
 public function testWildcardQuery()
 {
     $index = Lucene\Lucene::open(dirname(__FILE__) . '/_index23Sample/_files');
     $wildcardMinPrefix = Query\Wildcard::getMinPrefixLength();
     Query\Wildcard::setMinPrefixLength(0);
     $hits = $index->find('*cont*');
     $this->assertEquals(count($hits), 9);
     $expectedResultset = array(array(8, 0.328087, 'IndexSource/contributing.html'), array(2, 0.318592, 'IndexSource/contributing.patches.html'), array(7, 0.260137, 'IndexSource/contributing.bugs.html'), array(0, 0.203372, 'IndexSource/contributing.documentation.html'), array(1, 0.202366, 'IndexSource/contributing.wishlist.html'), array(4, 0.052931, 'IndexSource/copyright.html'), array(3, 0.01707, 'IndexSource/about-pear.html'), array(5, 0.01015, 'IndexSource/authors.html'), array(9, 0.003504, 'IndexSource/core.html'));
     foreach ($hits as $resId => $hit) {
         $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
         $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6);
         $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
     }
     Query\Wildcard::setMinPrefixLength($wildcardMinPrefix);
 }
Ejemplo n.º 3
0
 public function tearDown()
 {
     Query\Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
     Query\Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
 }
Ejemplo n.º 4
0
 public function testWildcardQuery()
 {
     $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');
     $wildcardMinPrefix = Query\Wildcard::getMinPrefixLength();
     Query\Wildcard::setMinPrefixLength(0);
     $hits = $index->find('*cont*');
     $this->assertEquals(count($hits), 9);
     $expectedResultset = array(array(8, 0.125253, 'IndexSource/contributing.html'), array(4, 0.112122, 'IndexSource/copyright.html'), array(2, 0.108491, 'IndexSource/contributing.patches.html'), array(7, 0.07771599999999999, 'IndexSource/contributing.bugs.html'), array(0, 0.05076, 'IndexSource/contributing.documentation.html'), array(1, 0.049163, 'IndexSource/contributing.wishlist.html'), array(3, 0.036159, 'IndexSource/about-pear.html'), array(5, 0.0215, 'IndexSource/authors.html'), array(9, 0.007422, 'IndexSource/core.html'));
     foreach ($hits as $resId => $hit) {
         $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
         $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6);
         $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
     }
     Query\Wildcard::setMinPrefixLength($wildcardMinPrefix);
 }
Ejemplo n.º 5
0
 /**
  * Query specific matches highlighting
  *
  * @param \Zend\Search\Lucene\Search\Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
  */
 protected function _highlightMatches(Highlighter $highlighter)
 {
     /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
     /** Skip exact term matching recognition, keyword fields highlighting is not supported */
     // -------------------------------------
     // Recognize wildcard queries
     /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
     if (@preg_match('/\\pL/u', 'a') == 1) {
         $word = iconv($this->_encoding, 'UTF-8', $this->_word);
         $wildcardsPattern = '/[*?]/u';
         $subPatternsEncoding = 'UTF-8';
     } else {
         $word = $this->_word;
         $wildcardsPattern = '/[*?]/';
         $subPatternsEncoding = $this->_encoding;
     }
     $subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
     if (count($subPatterns) > 1) {
         // Wildcard query is recognized
         $pattern = '';
         foreach ($subPatterns as $id => $subPattern) {
             // Append corresponding wildcard character to the pattern before each sub-pattern (except first)
             if ($id != 0) {
                 $pattern .= $word[$subPattern[1] - 1];
             }
             // Check if each subputtern is a single word in terms of current analyzer
             $tokens = Analyzer\Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
             if (count($tokens) > 1) {
                 // Do nothing (nothing is highlighted)
                 return;
             }
             foreach ($tokens as $token) {
                 $pattern .= $token->getTermText();
             }
         }
         $term = new Index\Term($pattern, $this->_field);
         $query = new Query\Wildcard($term);
         $query->_highlightMatches($highlighter);
         return;
     }
     // -------------------------------------
     // Recognize one-term multi-term and "insignificant" queries
     $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
     if (count($tokens) == 0) {
         // Do nothing
         return;
     }
     if (count($tokens) == 1) {
         $highlighter->highlight($tokens[0]->getTermText());
         return;
     }
     //It's not insignificant or one term query
     $words = array();
     foreach ($tokens as $token) {
         $words[] = $token->getTermText();
     }
     $highlighter->highlight($words);
 }