Ejemplo n.º 1
0
 public function __construct()
 {
     parent::__construct();
     $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8());
 }
 /**
  * Zend Search Lucene makes it awfully hard to have multiple Lucene indexes
  * open at the same time. This method combats that by configuring all the
  * static variables for this instance.
  */
 public function configure()
 {
     sfLuceneToolkit::loadZend();
     $this->getEventDispatcher()->notify(new sfEvent($this, 'lucene.configure.pre'));
     Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($this->getParameter('encoding'));
     switch (strtolower($this->getParameter('analyzer'))) {
         default:
             throw new sfLuceneException('Unknown analyzer: ' . $this->getParameter('analzyer'));
         case 'text':
             $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text();
             break;
         case 'textnum':
             $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum();
             break;
         case 'utf8':
         case 'utf-8':
             $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8();
             break;
         case 'utf8num':
         case 'utf-8num':
             $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num();
             break;
     }
     if (!$this->getParameter('case_sensitive', false)) {
         $analyzer->addFilter(new sfLuceneLowerCaseFilter($this->getParameter('mb_string', false)));
     }
     if (count($this->getParameter('stop_words'))) {
         $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($this->getParameter('stop_words')));
     }
     if ($this->getParameter('short_words') > 0) {
         $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords($this->getParameter('short_words')));
     }
     Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
     $this->getEventDispatcher()->notify(new sfEvent($this, 'lucene.configure.post'));
 }
Ejemplo n.º 3
0
 public function testUtf8Num()
 {
     if (@preg_match('/\\pL/u', 'a') != 1) {
         // PCRE unicode support is turned off
         return;
     }
     $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'Слово1');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 6);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'Слово2');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 13);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'ДругоеСлово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 25);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }
Ejemplo n.º 4
0
 public function testUtf8Num()
 {
     $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'Слово1');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 6);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'Слово2');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 13);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'ДругоеСлово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 24);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }