예제 #1
0
 /**
  * @return CaseInsensitive
  */
 private function getAnalyzerForHighlighter()
 {
     // используем анализатор для кодировки UTF-8 нечувствительный к регистру
     $analyzer = new CaseInsensitive();
     // добавляем к анализатору морфологический фильтр
     $analyzer->addFilter(new MorphyFilter());
     return $analyzer;
 }
 public function __construct()
 {
     parent::__construct();
     $this->addFilter(new AsciiFoldingFilter());
     $this->_minNGramSize = 2;
     $this->_minWordSize = $this->_minNGramSize;
 }
예제 #3
0
 public function testUtf8NumCaseInsensitive()
 {
     if (@preg_match('/\\pL/u', 'a') != 1) {
         // PCRE unicode support is turned off
         return;
     }
     if (!function_exists('mb_strtolower')) {
         // mbstring extension is disabled
         return;
     }
     /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */
     $analyzer = new Utf8Num\CaseInsensitive();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'слово1');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 6);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'слово2');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 13);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 25);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }
 public function __construct()
 {
     parent::__construct();
     $this->addFilter(new AsciiFoldingFilter());
 }