public function buildplaces() { ini_set('memory_limit', '1000M'); set_time_limit(0); $time = time(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); /** * Create index */ $index = Zend_Search_Lucene::create($this->_indexPath); /** * Get all users */ $sql = $this->_db->select()->from($this->_name, array('id', 'name', 'placepic'))->limit(7500); $result = $this->_db->fetchAssoc($sql); foreach ($result as $values) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('placeid', $values['id'])); $doc->addField(Zend_Search_Lucene_Field::text('placename', $values['name'])); $doc->addField(Zend_Search_Lucene_Field::unStored('placepic', $values['placepic'])); $index->addDocument($doc); } $index->commit(); $elapsed = time() - $time; print_r($elapsed); }
public function __construct() { Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); //set default encoding Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_CJK()); //set default Analyzer }
/** * Adds a document to this segment. * * @param Zend_Search_Lucene_Document $document * @throws Zend_Search_Lucene_Exception */ public function addDocument(Zend_Search_Lucene_Document $document) { $storedFields = array(); $docNorms = array(); $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); $this->addField($field); if ($field->storeTermVector) { /** * @todo term vector storing support */ throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue); } else { $tokenList = array(); $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue)); } $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, count($tokenList)))); $position = 0; foreach ($tokenList as $token) { $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else { if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } } if ($field->isStored) { $storedFields[] = $field; } } foreach ($this->_fields as $fieldName => $field) { if (!$field->isIndexed) { continue; } if (!isset($this->_norms[$fieldName])) { $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount); } if (isset($docNorms[$fieldName])) { $this->_norms[$fieldName] .= $docNorms[$fieldName]; } else { $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))); } } $this->addStoredFields($storedFields); }
public function getQuery($encoding) { if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) { require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.'); } $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding); if (count($tokens) == 0) { return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($tokens) == 1) { $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field); $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->_boost); return $query; } //It's not empty or one term query $position = -1; $query = new Zend_Search_Lucene_Search_Query_Phrase(); foreach ($tokens as $token) { $position += $token->getPositionIncrement(); $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field); $query->addTerm($term, $position); } if ($this->_proximityQuery) { $query->setSlop($this->_wordsDistance); } $query->setBoost($this->_boost); return $query; }
/** * Creates a new ZendLucene handler connection * * @param string $location */ public function __construct($location) { /** * We're using realpath here because Zend_Search_Lucene does not do * that itself. It can cause issues because their destructor uses the * same filename but the cwd could have been changed. */ $location = realpath($location); /* If the $location doesn't exist, ZSL throws a *generic* exception. We * don't care here though and just always assume it is because the * index does not exist. If it doesn't exist, we create it. */ try { $this->connection = Zend_Search_Lucene::open($location); } catch (Zend_Search_Lucene_Exception $e) { $this->connection = Zend_Search_Lucene::create($location); } $this->inTransaction = 0; if (!$this->connection) { throw new ezcSearchCanNotConnectException('zendlucene', $location); } // Set proper default encoding for query parser Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); }
/** * Returns Zend_Search_Lucene instance for given subroot * * every subroot has it's own instance * * @param Kwf_Component_Data for this index * @return Zend_Search_Lucene_Interface */ public static function getInstance(Kwf_Component_Data $subroot) { while ($subroot) { if (Kwc_Abstract::getFlag($subroot->componentClass, 'subroot')) { break; } $subroot = $subroot->parent; } if (!$subroot) { $subroot = Kwf_Component_Data_Root::getInstance(); } static $instance = array(); if (!isset($instance[$subroot->componentId])) { $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2)); //$stopWords = explode(' ', 'der dir das einer eine ein und oder doch ist sind an in vor nicht wir ihr sie es ich'); //$analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords)); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666); $path = 'cache/fulltext'; $path .= '/' . $subroot->componentId; try { $instance[$subroot->componentId] = Zend_Search_Lucene::open($path); } catch (Zend_Search_Lucene_Exception $e) { $instance[$subroot->componentId] = Zend_Search_Lucene::create($path); } } return $instance[$subroot->componentId]; }
/** * Return the default Analyzer implementation used by indexing code. * * @return Zend_Search_Lucene_Analysis_Analyzer */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) { self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); } return self::$_defaultImpl; }
public static function update($data) { try { //Update an index. $index = Zend_Search_Lucene::open('../application/searchindex'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); } catch (Zend_Search_Exception $e) { throw $e; } // remove an existing entry $hits = $index->find('pk:' . $data['pk']); foreach ($hits as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $data['pk'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('code', $data['code'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('u_code', strtolower($data['code']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('type', $data['type'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('id', $data['id'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', $data['title'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_title', Default_Model_Functions::convert_vi_to_en($data['title']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('description', $data['description'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_description', Default_Model_Functions::convert_vi_to_en($data['description']), 'UTF-8')); $index->addDocument($doc); $index->commit(); }
public function luceneSearchAction() { $this->view->layout()->disableLayout(); $this->_helper->viewRenderer->setNoRender(true); $path = PUBLIC_PATH . '/tmp/lucene'; $index = Zend_Search_Lucene::open($path); // $term = new Zend_Search_Lucene_Index_Term('ritesh','title'); // $subquery1 = new Zend_Search_Lucene_Search_Query_Term($term); // // $from = new Zend_Search_Lucene_Index_Term('0', 'empcode'); // $to = new Zend_Search_Lucene_Index_Term('53', 'empcode'); // $subquery2 = new Zend_Search_Lucene_Search_Query_Range($from, $to, true); // // $query = new Zend_Search_Lucene_Search_Query_Boolean(); // $query->addSubquery($subquery1, true ); // $query->addSubquery($subquery2, null ); // Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); // $hits = $index->find($query); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(1); $hits = $index->find("empcode:[000 TO 200]"); foreach ($hits as $h) { echo "Title:" . $h->title; echo "-------EmpCode:" . $h->empcode; echo "<br>"; } }
/** * Searchengine::__construct() * * @return */ public function __construct() { parent::__construct(); $this->search_index = APPPATH . 'cache/search_index/index'; $this->load->library('zend'); $this->zend->load('Zend/Search/Lucene'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); }
public function init() { if (is_file(TEMP_PATH . '/Search/write.lock.file')) { $this->_indexHandle = Zend_Search_Lucene::open(TEMP_PATH . '/Search'); } else { $this->_indexHandle = Zend_Search_Lucene::create(TEMP_PATH . '/Search'); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); }
public function __construct() { $this->_log()->info('Starting up'); if (@preg_match('/\\pL/u', 'a') != 1) { $this->_log()->err("PCRE unicode support is turned off.\n"); } Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($this->_encoding); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); }
public function testAnalyzer() { $currentAnalyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $this->assertTrue($currentAnalyzer instanceof Zend_Search_Lucene_Analysis_Analyzer); $newAnalyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num(); Zend_Search_Lucene_Analysis_Analyzer::setDefault($newAnalyzer); $this->assertTrue(Zend_Search_Lucene_Analysis_Analyzer::getDefault() === $newAnalyzer); // Set analyzer to the default value (used in other tests) Zend_Search_Lucene_Analysis_Analyzer::setDefault($currentAnalyzer); }
public static function getLuceneIndex() { ProjectConfiguration::registerZend(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); if (file_exists($index = self::getLuceneIndexFile())) { return Zend_Search_Lucene::open($index); } else { return Zend_Search_Lucene::create($index); } }
public static function registerZend() { if (self::$zendLoaded) { return; } set_include_path(sfConfig::get('sf_lib_dir') . '/vendor' . PATH_SEPARATOR . get_include_path()); require_once sfConfig::get('sf_lib_dir') . '/vendor/Zend/Loader/Autoloader.php'; Zend_Loader_Autoloader::getInstance(); self::$zendLoaded = true; Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); }
function __construct($directory, $lang = 'en', $highlight = true) { switch ($lang) { case 'en': default: Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); } $this->directory = $directory; $this->lastModif = file_exists($directory) ? filemtime($directory) : 0; $this->highlight = (bool) $highlight; }
public function actionSearch() { //working. $this->layout = 'column2'; if (($term = Yii::app()->getRequest()->getParam('q', null)) !== null) { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); $index = new Zend_Search_Lucene(Yii::getPathOfAlias('application.' . $this->_indexFiles)); $results = $index->find($term); $query = Zend_Search_Lucene_Search_QueryParser::parse($term); $this->render('search', compact('results', 'term', 'query')); } }
public static function renewIndex($forceCreate = false) { $pages = Application_Model_Mappers_PageMapper::getInstance()->getPagesForSearchIndex(); if (!is_array($pages)) { return false; } Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); self::removeIndex() && self::initIndex(); array_walk($pages, array('Tools_Search_Tools', 'addPageToIndex')); self::$_index->optimize(); }
public function analyze($text) { $result = parent::analyze($text); sfOpenPNEApplicationConfiguration::registerZend(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($text, 'UTF-8'); while (($nextToken = $analyzer->nextToken()) !== null) { $result[] = $nextToken->getTermText(); } return $result; }
/** * Constructor */ public function __construct() { $dataPath = DATA_PATH . '/search'; if (!is_writable(DATA_PATH)) { die('The directory ' . DATA_PATH . ' is not writable'); } try { $this->_data = Zend_Search_Lucene::open($dataPath); } catch (Exception $error) { $this->_data = Zend_Search_Lucene::create($dataPath); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); }
function __construct($directory, $lang = 'en', $highlight = true) { switch ($lang) { case 'en': default: Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); } Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0660); $this->directory = $directory; $this->lastModif = file_exists($directory) ? filemtime($directory) : 0; $this->highlight = (bool) $highlight; }
public function actionSearch() { $indexFiles = Yii::app()->getModule('zendsearch')->indexFiles; SetLocale(LC_ALL, 'ru_RU.UTF-8'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); if (($term = Yii::app()->getRequest()->getQuery('q', null)) !== null) { $index = new Zend_Search_Lucene(Yii::getPathOfAlias('application.' . $indexFiles)); $results = $index->find($term); $query = Zend_Search_Lucene_Search_QueryParser::parse($term); $this->render('search', compact('results', 'term', 'query')); } }
public function __construct($indexDir = null) { if (empty($indexDir)) { throw new Zend_Exception('Index Directory can not be empty!'); } try { $indexDir = KUTU_ROOT_DIR . $indexDir; $this->_index = Zend_Search_Lucene::open($indexDir); } catch (Exception $e) { $this->_index = Zend_Search_Lucene::create($indexDir); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); }
private static function prepareZendSearchLucene() { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $stopWords = sfConfig::get('app_sf_propel_luceneable_behavior_stopWords', false); $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(false === $stopWords ? array() : explode(',', $stopWords)); Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($stopWordsFilter); $shortWords = sfConfig::get('app_sf_propel_luceneable_behavior_shortWords', 3); $shortWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords($shortWords); Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($shortWordsFilter); Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0777); }
public function __construct() { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $indexPath = APPLICATION_PATH . '/modules/' . MODULE_NAME . '/search/index'; if (file_exists($indexPath)) { $this->_index = Zend_Search_Lucene::open($indexPath); } else { Bbx_Log::write('Creating index file', null, self::LOG); $this->_index = Zend_Search_Lucene::create($indexPath); if (!$this->_index instanceof Zend_Search_Lucene_Interface) { Bbx_Log::write('Unable to create index file', null, self::LOG); } } }
public function init() { $this->_helper->layout()->disableLayout(); $this->_helper->viewRenderer->setNoRender(true); ini_set('max_execution_time', 0); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $indexPath = TEMP_PATH . '/Search'; $files = glob($indexPath . '/*.*'); // чистим старй индекс foreach ($files as $file) { unlink($file); } $this->_indexHandle = Zend_Search_Lucene::create($indexPath); }
/** * Constructor method. */ function SiteSearch() { if (!@file_exists($this->path . '/segments')) { $this->client = new Zend_Search_Lucene($this->path, true); } else { $this->client = new Zend_Search_Lucene($this->path); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); ini_set('iconv.input_encoding', 'utf-8'); ini_set('iconv.internal_encoding', 'utf-8'); ini_set('iconv.output_encoding', 'utf-8'); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); umask(00); }
public function __construct($indexDir = null) { if (empty($indexDir)) { $registry = Zend_Registry::getInstance(); $conf = $registry->get('config'); $indexDir = KUTU_ROOT_DIR . $conf->indexing->dir; } try { $this->_index = Zend_Search_Lucene::open($indexDir); } catch (Exception $e) { $this->_index = Zend_Search_Lucene::create($indexDir); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); }
function lucene_open() { global $globals, $lucene_stopwords; // Change the token analyzer $analyzer = new Mnm_Lucene_Analysis_Analyzer_Common_Utf8Num($lucene_stopwords); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); if (file_exists($globals['lucene_dir'])) { $index = Zend_Search_Lucene::open($globals['lucene_dir']); } else { print "Creando dir\n"; $index = Zend_Search_Lucene::create($globals['lucene_dir']); @chmod($globals['lucene_dir'], 0777); } return $index; }
public function __construct($config) { $this->indexFile = $config['indexFile']; $this->__setSources($config['source']); $this->indexDirectory = TMP; if (!empty($config['indexDirectory'])) { $this->indexDirectory = $config['indexDirectory']; } $this->__loadIndex($this->indexDirectory . $this->indexFile); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding(strtolower(Configure::read('App.encoding'))); if (!empty($config['analyzer'])) { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new $config['analyzer']()); } parent::__construct($config); }