public function testFuzzyQuery() { $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files'); $defaultPrefixLength = Query\Fuzzy::getDefaultPrefixLength(); Query\Fuzzy::setDefaultPrefixLength(0); $hits = $index->find('tesd~0.4'); $this->assertEquals(count($hits), 9); $expectedResultset = array(array(2, 0.037139, 'IndexSource/contributing.patches.html'), array(0, 0.008735, 'IndexSource/contributing.documentation.html'), array(7, 0.002449, 'IndexSource/contributing.bugs.html'), array(1, 0.000483, 'IndexSource/contributing.wishlist.html'), array(3, 0.000483, 'IndexSource/about-pear.html'), array(9, 0.000483, 'IndexSource/core.html'), array(5, 0.000414, 'IndexSource/authors.html'), array(8, 0.000414, 'IndexSource/contributing.html'), array(4, 0.000345, 'IndexSource/copyright.html')); foreach ($hits as $resId => $hit) { $this->assertEquals($hit->id, $expectedResultset[$resId][0]); $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6); $this->assertEquals($hit->path, $expectedResultset[$resId][2]); } Query\Fuzzy::setDefaultPrefixLength($defaultPrefixLength); }
public function tearDown() { Query\Wildcard::setMinPrefixLength($this->_wildcardMinPrefix); Query\Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength); }
/** * Query specific matches highlighting * * @param Highlighter $highlighter Highlighter object (also contains doc for highlighting) */ protected function _highlightMatches(Highlighter $highlighter) { /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */ /** Skip exact term matching recognition, keyword fields highlighting is not supported */ // ------------------------------------- // Recognize wildcard queries /** * @todo check for PCRE unicode support may be performed through Zend_Environment in some future */ ErrorHandler::start(E_WARNING); $result = preg_match('/\\pL/u', 'a'); ErrorHandler::stop(); if ($result == 1) { $subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word)); } else { $subPatterns = preg_split('/[*?]/', $this->_word); } if (count($subPatterns) > 1) { // Do nothing return; } // ------------------------------------- // Recognize one-term multi-term and "insignificant" queries $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_word, $this->_encoding); if (count($tokens) == 0) { // Do nothing return; } if (count($tokens) == 1) { $term = new Index\Term($tokens[0]->getTermText(), $this->_field); $query = new Query\Fuzzy($term, $this->_minimumSimilarity); $query->_highlightMatches($highlighter); return; } // Word is tokenized into several tokens // But fuzzy search is supported only for non-multiple word terms // Do nothing }