Пример #1
0
 /**
  * Returns Zend_Search_Lucene instance for given subroot
  *
  * every subroot has it's own instance
  *
  * @param Kwf_Component_Data for this index
  * @return Zend_Search_Lucene_Interface
  */
 public static function getInstance(Kwf_Component_Data $subroot)
 {
     while ($subroot) {
         if (Kwc_Abstract::getFlag($subroot->componentClass, 'subroot')) {
             break;
         }
         $subroot = $subroot->parent;
     }
     if (!$subroot) {
         $subroot = Kwf_Component_Data_Root::getInstance();
     }
     static $instance = array();
     if (!isset($instance[$subroot->componentId])) {
         $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
         $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2));
         //$stopWords = explode(' ', 'der dir das einer eine ein und oder doch ist sind an in vor nicht wir ihr sie es ich');
         //$analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords));
         Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
         Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8');
         Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666);
         $path = 'cache/fulltext';
         $path .= '/' . $subroot->componentId;
         try {
             $instance[$subroot->componentId] = Zend_Search_Lucene::open($path);
         } catch (Zend_Search_Lucene_Exception $e) {
             $instance[$subroot->componentId] = Zend_Search_Lucene::create($path);
         }
     }
     return $instance[$subroot->componentId];
 }
Пример #2
0
 /**
  * Construct, create index
  *
  * @param string $indexPath[optional]
  * @param string $encoding[optional]
  * @throws Axis_Exception
  */
 public function __construct(array $params)
 {
     $encoding = $this->_encoding;
     $indexPath = array_shift($params);
     if (count($params)) {
         $encoding = array_shift($params);
     }
     if (null === $indexPath) {
         $site = Axis::getSite()->id;
         $locale = Axis::single('locale/language')->find(Axis_Locale::getLanguageId())->current()->locale;
         $indexPath = Axis::config()->system->path . '/var/index/' . $site . '/' . $locale;
     }
     if (!is_readable($indexPath)) {
         throw new Axis_Exception(Axis::translate('search')->__('Please, update search indexes, to enable search functionality'));
     }
     /*
     $mySimilarity = new Axis_Similarity();
     Zend_Search_Lucene_Search_Similarity::setDefault($mySimilarity);
     */
     Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($encoding);
     // add filter by words
     $stopWords = array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am');
     $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords);
     $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
     $analyzer->addFilter($stopWordsFilter);
     Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
     $this->_index = Zend_Search_Lucene::open($indexPath);
     $this->_encoding = $encoding;
 }
Пример #3
0
 /**
  * Constructor, sets filters.
  */
 public function __construct()
 {
     parent::__construct();
     $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8());
     $this->addFilter(new LuceneAPI_Search_Lucene_Analysis_TokenFilter_ShortWords($this->_getMinLength()));
     $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($this->_getStopWords()));
     $this->addFilter(new LuceneAPI_Search_Lucene_Analysis_TokenFilter_Drupal());
 }
Пример #4
0
 public function testUtf8NumCaseInsensitive()
 {
     if (@preg_match('/\\pL/u', 'a') != 1) {
         // PCRE unicode support is turned off
         return;
     }
     if (!function_exists('mb_strtolower')) {
         // mbstring extension is disabled
         return;
     }
     $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
     // UTF-8 text with a cyrillic symbols
     $tokenList = $analyzer->tokenize('Слово1 Слово2 ДругоеСлово', 'UTF-8');
     $this->assertEquals(count($tokenList), 3);
     $this->assertEquals($tokenList[0]->getTermText(), 'слово1');
     $this->assertEquals($tokenList[0]->getStartOffset(), 0);
     $this->assertEquals($tokenList[0]->getEndOffset(), 6);
     $this->assertEquals($tokenList[0]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[1]->getTermText(), 'слово2');
     $this->assertEquals($tokenList[1]->getStartOffset(), 7);
     $this->assertEquals($tokenList[1]->getEndOffset(), 13);
     $this->assertEquals($tokenList[1]->getPositionIncrement(), 1);
     $this->assertEquals($tokenList[2]->getTermText(), 'другоеслово');
     $this->assertEquals($tokenList[2]->getStartOffset(), 14);
     $this->assertEquals($tokenList[2]->getEndOffset(), 25);
     $this->assertEquals($tokenList[2]->getPositionIncrement(), 1);
 }
 protected function _initZendSearch()
 {
     $filter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords();
     $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
     $analyzer->addFilter($filter);
     Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
 }