/** * Get index * @return \ZendSearch\Lucene\Index */ private function index() { if (!isset(self::$index)) { $analyzer = new CaseInsensitive(); if ($this->config()->exists('zend_search', 'stop_words')) { $stop_word_filter = new StopWords(); $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words')); if ($words !== false) { $stop_word_filter->loadFromFile($words); } else { throw new \InvalidArgumentException('Path not found'); } $analyzer->addFilter($stop_word_filter); } if ($this->config()->exists('zend_search', 'morphy_dicts')) { $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts')); if ($morphy_dicts !== false) { $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset())); } else { throw new \InvalidArgumentException('Path not found'); } } Analyzer::setDefault($analyzer); Lucene::setResultSetLimit($this->limit); QueryParser::setDefaultEncoding($this->config()->getCharset()); $index = $this->config() - get('zend_search', 'index'); $path = $this->getRealPath($index); self::$index = $path ? Lucene::open($path) : Lucene::create($index); } return self::$index; }
public function newInstance($path) { if (!is_file($path)) { throw new \InvalidArgumentException("File '{$path}' with stop words doesn't exist."); } $stopWordsFilter = new StopWordsFilter(); $stopWordsFilter->loadFromFile($path); return $stopWordsFilter; }
/** * @return CaseInsensitive */ private function getDefaultAnalyzer() { // используем анализатор для кодировки UTF-8 нечувствительный к регистру $analyzer = new CaseInsensitive(); // добавляем к анализатору фильтры стоп-слов и морфологический фильтр $stopWordsFilter = new StopWords(); $stopWordsFilter->loadFromFile(__DIR__ . '/phpmorphy/stop-words/stop-words-russian.txt'); $analyzer->addFilter($stopWordsFilter); $stopWordsFilter = new StopWords(); $stopWordsFilter->loadFromFile(__DIR__ . '/phpmorphy/stop-words/stop-words-english4.txt'); $analyzer->addFilter($stopWordsFilter); $analyzer->addFilter(new MorphyFilter()); return $analyzer; }