public function analyze($text) { $result = parent::analyze($text); sfOpenPNEApplicationConfiguration::registerZend(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($text, 'UTF-8'); while (($nextToken = $analyzer->nextToken()) !== null) { $result[] = $nextToken->getTermText(); } return $result; }
public function testStandardAnalyzerCanHandleAccentedCharactersGracefullyWorks() { $analyzer = new Doctrine_Search_Analyzer_Standard(); $words = $analyzer->analyze('un éléphant ça trompe énormément', 'utf-8'); $this->assertEqual($words[1], 'elephant'); $this->assertEqual($words[2], 'ca'); $this->assertEqual($words[4], 'enormement'); }