public function testEncoding() { $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8(); // UTF-8 text with a cyrillic symbols $tokenList = $analyzer->tokenize(iconv('UTF-8', 'Windows-1251', 'Слово1 Слово2 ДругоеСлово'), 'Windows-1251'); $this->assertEquals(count($tokenList), 3); $this->assertEquals($tokenList[0]->getTermText(), 'Слово'); $this->assertEquals($tokenList[0]->getStartOffset(), 0); $this->assertEquals($tokenList[0]->getEndOffset(), 5); $this->assertEquals($tokenList[0]->getPositionIncrement(), 1); $this->assertEquals($tokenList[1]->getTermText(), 'Слово'); $this->assertEquals($tokenList[1]->getStartOffset(), 7); $this->assertEquals($tokenList[1]->getEndOffset(), 12); $this->assertEquals($tokenList[1]->getPositionIncrement(), 1); $this->assertEquals($tokenList[2]->getTermText(), 'ДругоеСлово'); $this->assertEquals($tokenList[2]->getStartOffset(), 14); $this->assertEquals($tokenList[2]->getEndOffset(), 25); $this->assertEquals($tokenList[2]->getPositionIncrement(), 1); }
public function testEncoding() { if (PHP_OS == 'AIX') { $this->markTestSkipped('Test not available on AIX'); } /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php'; $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8(); // UTF-8 text with a cyrillic symbols $tokenList = $analyzer->tokenize(iconv('UTF-8', 'Windows-1251', 'Слово1 Слово2 ДругоеСлово'), 'Windows-1251'); $this->assertEquals(count($tokenList), 3); $this->assertEquals($tokenList[0]->getTermText(), 'Слово'); $this->assertEquals($tokenList[0]->getStartOffset(), 0); $this->assertEquals($tokenList[0]->getEndOffset(), 5); $this->assertEquals($tokenList[0]->getPositionIncrement(), 1); $this->assertEquals($tokenList[1]->getTermText(), 'Слово'); $this->assertEquals($tokenList[1]->getStartOffset(), 7); $this->assertEquals($tokenList[1]->getEndOffset(), 12); $this->assertEquals($tokenList[1]->getPositionIncrement(), 1); $this->assertEquals($tokenList[2]->getTermText(), 'ДругоеСлово'); $this->assertEquals($tokenList[2]->getStartOffset(), 14); $this->assertEquals($tokenList[2]->getEndOffset(), 25); $this->assertEquals($tokenList[2]->getPositionIncrement(), 1); }