예제 #1
0
 public function testFuzzyQuery()
 {
     $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
     $defaultPrefixLength = Query\Fuzzy::getDefaultPrefixLength();
     Query\Fuzzy::setDefaultPrefixLength(0);
     $hits = $index->find('tesd~0.4');
     $this->assertEquals(count($hits), 9);
     $expectedResultset = array(array(2, 0.037139, 'IndexSource/contributing.patches.html'), array(0, 0.008735, 'IndexSource/contributing.documentation.html'), array(7, 0.002449, 'IndexSource/contributing.bugs.html'), array(1, 0.000483, 'IndexSource/contributing.wishlist.html'), array(3, 0.000483, 'IndexSource/about-pear.html'), array(9, 0.000483, 'IndexSource/core.html'), array(5, 0.000414, 'IndexSource/authors.html'), array(8, 0.000414, 'IndexSource/contributing.html'), array(4, 0.000345, 'IndexSource/copyright.html'));
     foreach ($hits as $resId => $hit) {
         $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
         $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6);
         $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
     }
     Query\Fuzzy::setDefaultPrefixLength($defaultPrefixLength);
 }
예제 #2
0
 public function tearDown()
 {
     Query\Wildcard::setMinPrefixLength($this->_wildcardMinPrefix);
     Query\Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength);
 }
예제 #3
0
 /**
  * Query specific matches highlighting
  *
  * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
  */
 protected function _highlightMatches(Highlighter $highlighter)
 {
     /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
     /** Skip exact term matching recognition, keyword fields highlighting is not supported */
     // -------------------------------------
     // Recognize wildcard queries
     /** 
      * @todo check for PCRE unicode support may be performed through Zend_Environment in some future 
      */
     ErrorHandler::start(E_WARNING);
     $result = preg_match('/\\pL/u', 'a');
     ErrorHandler::stop();
     if ($result == 1) {
         $subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
     } else {
         $subPatterns = preg_split('/[*?]/', $this->_word);
     }
     if (count($subPatterns) > 1) {
         // Do nothing
         return;
     }
     // -------------------------------------
     // Recognize one-term multi-term and "insignificant" queries
     $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
     if (count($tokens) == 0) {
         // Do nothing
         return;
     }
     if (count($tokens) == 1) {
         $term = new Index\Term($tokens[0]->getTermText(), $this->_field);
         $query = new Query\Fuzzy($term, $this->_minimumSimilarity);
         $query->_highlightMatches($highlighter);
         return;
     }
     // Word is tokenized into several tokens
     // But fuzzy search is supported only for non-multiple word terms
     // Do nothing
 }