/** * @return CaseInsensitive */ private function getAnalyzerForHighlighter() { // используем анализатор для кодировки UTF-8 нечувствительный к регистру $analyzer = new CaseInsensitive(); // добавляем к анализатору морфологический фильтр $analyzer->addFilter(new MorphyFilter()); return $analyzer; }
public function __construct() { parent::__construct(); $this->addFilter(new AsciiFoldingFilter()); $this->_minNGramSize = 2; $this->_minWordSize = $this->_minNGramSize; }
public function testUtf8NumCaseInsensitive() { if (@preg_match('/\\pL/u', 'a') != 1) { // PCRE unicode support is turned off return; } if (!function_exists('mb_strtolower')) { // mbstring extension is disabled return; } /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */ $analyzer = new Utf8Num\CaseInsensitive(); // UTF-8 text with a cyrillic symbols $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8'); $this->assertEquals(count($tokenList), 3); $this->assertEquals($tokenList[0]->getTermText(), 'слово1'); $this->assertEquals($tokenList[0]->getStartOffset(), 0); $this->assertEquals($tokenList[0]->getEndOffset(), 6); $this->assertEquals($tokenList[0]->getPositionIncrement(), 1); $this->assertEquals($tokenList[1]->getTermText(), 'слово2'); $this->assertEquals($tokenList[1]->getStartOffset(), 7); $this->assertEquals($tokenList[1]->getEndOffset(), 13); $this->assertEquals($tokenList[1]->getPositionIncrement(), 1); $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово'); $this->assertEquals($tokenList[2]->getStartOffset(), 14); $this->assertEquals($tokenList[2]->getEndOffset(), 25); $this->assertEquals($tokenList[2]->getPositionIncrement(), 1); }
public function __construct() { parent::__construct(); $this->addFilter(new AsciiFoldingFilter()); }