/** * Transforms a generic Query object into an Elastic Search query DSL * * @param Query $query */ public function mapQuery(Query $query) { $arguments = array(); $map = new Boolean(); if ($query->hasQueryString()) { Lucene::setDefaultSearchField($query->getQueryString()->getDefaultField()); QueryParser::setDefaultOperator($query->getQueryString()->getDefaultOperator() == Query::OPERATOR_AND ? QueryParser::B_AND : QueryParser::B_OR); $keyword = $query->getQueryString()->getQuery(); if ("*" === $keyword) { $subQuery = new Wildcard(new Term($keyword)); $subQuery->setMinPrefixLength(0); } else { $subQuery = QueryParser::parse($keyword); } $map->addSubquery($subQuery, true); } $arguments[] = $map; foreach ($query->getSort() as $sort) { $arguments[] = key($sort); $arguments[] = SORT_REGULAR; $arguments[] = current($sort) == 'asc' ? SORT_ASC : SORT_DESC; } return $arguments; }
public function testWildcardQuery() { $index = Lucene\Lucene::open(dirname(__FILE__) . '/_index23Sample/_files'); $wildcardMinPrefix = Query\Wildcard::getMinPrefixLength(); Query\Wildcard::setMinPrefixLength(0); $hits = $index->find('*cont*'); $this->assertEquals(count($hits), 9); $expectedResultset = array(array(8, 0.328087, 'IndexSource/contributing.html'), array(2, 0.318592, 'IndexSource/contributing.patches.html'), array(7, 0.260137, 'IndexSource/contributing.bugs.html'), array(0, 0.203372, 'IndexSource/contributing.documentation.html'), array(1, 0.202366, 'IndexSource/contributing.wishlist.html'), array(4, 0.052931, 'IndexSource/copyright.html'), array(3, 0.01707, 'IndexSource/about-pear.html'), array(5, 0.01015, 'IndexSource/authors.html'), array(9, 0.003504, 'IndexSource/core.html')); foreach ($hits as $resId => $hit) { $this->assertEquals($hit->id, $expectedResultset[$resId][0]); $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6); $this->assertEquals($hit->path, $expectedResultset[$resId][2]); } Query\Wildcard::setMinPrefixLength($wildcardMinPrefix); }
public function tearDown() { Query\Wildcard::setMinPrefixLength($this->_wildcardMinPrefix); Query\Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength); }
public function testWildcardQuery() { $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files'); $wildcardMinPrefix = Query\Wildcard::getMinPrefixLength(); Query\Wildcard::setMinPrefixLength(0); $hits = $index->find('*cont*'); $this->assertEquals(count($hits), 9); $expectedResultset = array(array(8, 0.125253, 'IndexSource/contributing.html'), array(4, 0.112122, 'IndexSource/copyright.html'), array(2, 0.108491, 'IndexSource/contributing.patches.html'), array(7, 0.07771599999999999, 'IndexSource/contributing.bugs.html'), array(0, 0.05076, 'IndexSource/contributing.documentation.html'), array(1, 0.049163, 'IndexSource/contributing.wishlist.html'), array(3, 0.036159, 'IndexSource/about-pear.html'), array(5, 0.0215, 'IndexSource/authors.html'), array(9, 0.007422, 'IndexSource/core.html')); foreach ($hits as $resId => $hit) { $this->assertEquals($hit->id, $expectedResultset[$resId][0]); $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6); $this->assertEquals($hit->path, $expectedResultset[$resId][2]); } Query\Wildcard::setMinPrefixLength($wildcardMinPrefix); }
/** * Query specific matches highlighting * * @param \Zend\Search\Lucene\Search\Highlighter $highlighter Highlighter object (also contains doc for highlighting) */ protected function _highlightMatches(Highlighter $highlighter) { /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */ /** Skip exact term matching recognition, keyword fields highlighting is not supported */ // ------------------------------------- // Recognize wildcard queries /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */ if (@preg_match('/\\pL/u', 'a') == 1) { $word = iconv($this->_encoding, 'UTF-8', $this->_word); $wildcardsPattern = '/[*?]/u'; $subPatternsEncoding = 'UTF-8'; } else { $word = $this->_word; $wildcardsPattern = '/[*?]/'; $subPatternsEncoding = $this->_encoding; } $subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE); if (count($subPatterns) > 1) { // Wildcard query is recognized $pattern = ''; foreach ($subPatterns as $id => $subPattern) { // Append corresponding wildcard character to the pattern before each sub-pattern (except first) if ($id != 0) { $pattern .= $word[$subPattern[1] - 1]; } // Check if each subputtern is a single word in terms of current analyzer $tokens = Analyzer\Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding); if (count($tokens) > 1) { // Do nothing (nothing is highlighted) return; } foreach ($tokens as $token) { $pattern .= $token->getTermText(); } } $term = new Index\Term($pattern, $this->_field); $query = new Query\Wildcard($term); $query->_highlightMatches($highlighter); return; } // ------------------------------------- // Recognize one-term multi-term and "insignificant" queries $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_word, $this->_encoding); if (count($tokens) == 0) { // Do nothing return; } if (count($tokens) == 1) { $highlighter->highlight($tokens[0]->getTermText()); return; } //It's not insignificant or one term query $words = array(); foreach ($tokens as $token) { $words[] = $token->getTermText(); } $highlighter->highlight($words); }