/** * Get index * @return \ZendSearch\Lucene\Index */ private function index() { if (!isset(self::$index)) { $analyzer = new CaseInsensitive(); if ($this->config()->exists('zend_search', 'stop_words')) { $stop_word_filter = new StopWords(); $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words')); if ($words !== false) { $stop_word_filter->loadFromFile($words); } else { throw new \InvalidArgumentException('Path not found'); } $analyzer->addFilter($stop_word_filter); } if ($this->config()->exists('zend_search', 'morphy_dicts')) { $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts')); if ($morphy_dicts !== false) { $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset())); } else { throw new \InvalidArgumentException('Path not found'); } } Analyzer::setDefault($analyzer); Lucene::setResultSetLimit($this->limit); QueryParser::setDefaultEncoding($this->config()->getCharset()); $index = $this->config() - get('zend_search', 'index'); $path = $this->getRealPath($index); self::$index = $path ? Lucene::open($path) : Lucene::create($index); } return self::$index; }
public function testUtf8CaseInsensitive() { if (@preg_match('/\\pL/u', 'a') != 1) { // PCRE unicode support is turned off return; } if (!function_exists('mb_strtolower')) { // mbstring extension is disabled return; } /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */ $analyzer = new Utf8\CaseInsensitive(); // UTF-8 text with a cyrillic symbols $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8'); $this->assertEquals(count($tokenList), 3); $this->assertEquals($tokenList[0]->getTermText(), 'слово'); $this->assertEquals($tokenList[0]->getStartOffset(), 0); $this->assertEquals($tokenList[0]->getEndOffset(), 5); $this->assertEquals($tokenList[0]->getPositionIncrement(), 1); $this->assertEquals($tokenList[1]->getTermText(), 'слово'); $this->assertEquals($tokenList[1]->getStartOffset(), 7); $this->assertEquals($tokenList[1]->getEndOffset(), 12); $this->assertEquals($tokenList[1]->getPositionIncrement(), 1); $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово'); $this->assertEquals($tokenList[2]->getStartOffset(), 14); $this->assertEquals($tokenList[2]->getEndOffset(), 25); $this->assertEquals($tokenList[2]->getPositionIncrement(), 1); }