/** * Returns Zend_Search_Lucene instance for given subroot * * every subroot has it's own instance * * @param Kwf_Component_Data for this index * @return Zend_Search_Lucene_Interface */ public static function getInstance(Kwf_Component_Data $subroot) { while ($subroot) { if (Kwc_Abstract::getFlag($subroot->componentClass, 'subroot')) { break; } $subroot = $subroot->parent; } if (!$subroot) { $subroot = Kwf_Component_Data_Root::getInstance(); } static $instance = array(); if (!isset($instance[$subroot->componentId])) { $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2)); //$stopWords = explode(' ', 'der dir das einer eine ein und oder doch ist sind an in vor nicht wir ihr sie es ich'); //$analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords)); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666); $path = 'cache/fulltext'; $path .= '/' . $subroot->componentId; try { $instance[$subroot->componentId] = Zend_Search_Lucene::open($path); } catch (Zend_Search_Lucene_Exception $e) { $instance[$subroot->componentId] = Zend_Search_Lucene::create($path); } } return $instance[$subroot->componentId]; }
/** * Construct, create index * * @param string $indexPath[optional] * @param string $encoding[optional] * @throws Axis_Exception */ public function __construct(array $params) { $encoding = $this->_encoding; $indexPath = array_shift($params); if (count($params)) { $encoding = array_shift($params); } if (null === $indexPath) { $site = Axis::getSite()->id; $locale = Axis::single('locale/language')->find(Axis_Locale::getLanguageId())->current()->locale; $indexPath = Axis::config()->system->path . '/var/index/' . $site . '/' . $locale; } if (!is_readable($indexPath)) { throw new Axis_Exception(Axis::translate('search')->__('Please, update search indexes, to enable search functionality')); } /* $mySimilarity = new Axis_Similarity(); Zend_Search_Lucene_Search_Similarity::setDefault($mySimilarity); */ Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($encoding); // add filter by words $stopWords = array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am'); $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords); $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); $analyzer->addFilter($stopWordsFilter); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); $this->_index = Zend_Search_Lucene::open($indexPath); $this->_encoding = $encoding; }
/** * Constructor, sets filters. */ public function __construct() { parent::__construct(); $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8()); $this->addFilter(new LuceneAPI_Search_Lucene_Analysis_TokenFilter_ShortWords($this->_getMinLength())); $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($this->_getStopWords())); $this->addFilter(new LuceneAPI_Search_Lucene_Analysis_TokenFilter_Drupal()); }
public function testUtf8NumCaseInsensitive() { if (@preg_match('/\\pL/u', 'a') != 1) { // PCRE unicode support is turned off return; } if (!function_exists('mb_strtolower')) { // mbstring extension is disabled return; } $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); // UTF-8 text with a cyrillic symbols $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8'); $this->assertEquals(count($tokenList), 3); $this->assertEquals($tokenList[0]->getTermText(), 'слово1'); $this->assertEquals($tokenList[0]->getStartOffset(), 0); $this->assertEquals($tokenList[0]->getEndOffset(), 6); $this->assertEquals($tokenList[0]->getPositionIncrement(), 1); $this->assertEquals($tokenList[1]->getTermText(), 'слово2'); $this->assertEquals($tokenList[1]->getStartOffset(), 7); $this->assertEquals($tokenList[1]->getEndOffset(), 13); $this->assertEquals($tokenList[1]->getPositionIncrement(), 1); $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово'); $this->assertEquals($tokenList[2]->getStartOffset(), 14); $this->assertEquals($tokenList[2]->getEndOffset(), 25); $this->assertEquals($tokenList[2]->getPositionIncrement(), 1); }
protected function _initZendSearch() { $filter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(); $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); $analyzer->addFilter($filter); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); }