Ejemplo n.º 1
0
 /**
  * Get index
  * @return \ZendSearch\Lucene\Index
  */
 private function index()
 {
     if (!isset(self::$index)) {
         $analyzer = new CaseInsensitive();
         if ($this->config()->exists('zend_search', 'stop_words')) {
             $stop_word_filter = new StopWords();
             $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words'));
             if ($words !== false) {
                 $stop_word_filter->loadFromFile($words);
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
             $analyzer->addFilter($stop_word_filter);
         }
         if ($this->config()->exists('zend_search', 'morphy_dicts')) {
             $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts'));
             if ($morphy_dicts !== false) {
                 $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset()));
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
         }
         Analyzer::setDefault($analyzer);
         Lucene::setResultSetLimit($this->limit);
         QueryParser::setDefaultEncoding($this->config()->getCharset());
         $index = $this->config() - get('zend_search', 'index');
         $path = $this->getRealPath($index);
         self::$index = $path ? Lucene::open($path) : Lucene::create($index);
     }
     return self::$index;
 }
Ejemplo n.º 2
0
 public function testUtf8CaseInsensitive()
 {
     if (@preg_match('/\\pL/u', 'a') != 1) {
         // PCRE unicode support is turned off
         return;
     }
     if (!function_exists('mb_strtolower')) {
         // mbstring extension is disabled
         return;
     }
     /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */
     $analyzer = new Utf8\CaseInsensitive();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'слово');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 5);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'слово');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 12);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 25);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }