예제 #1
0
 public function __construct()
 {
     parent::__construct();
     $this->addFilter(new TokenFilter\LowerCaseUtf8());
 }
예제 #2
0
 public function testEncoding()
 {
     if (PHP_OS == 'AIX') {
         $this->markTestSkipped('Test not available on AIX');
     }
     /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */
     $analyzer = new Utf8\Utf8();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize(iconv('UTF-8', 'Windows-1251', 'Слово1 Слово2 ДругоеСлово'), 'Windows-1251');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'Слово');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 5);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'Слово');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 12);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'ДругоеСлово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 25);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }