public function testFilesystemSubfoldersAutoCreation() { $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2/dir3'); $this->assertTrue($directory instanceof Zend_Search_Lucene_Storage_Directory); $directory->close(); rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2/dir3'); rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2'); rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1'); }
public function testCreate() { $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source'); $stiFile = $directory->getFileObject('_1.sti'); $stiFileData = $stiFile->readBytes($directory->fileLength('_1.sti')); // Load dictionary index data list($termDictionary, $termDictionaryInfos) = unserialize($stiFileData); $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo('_1', 2, $directory); $tiiFile = $segmentInfo->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($segmentInfo->compoundFileLength('.tii')); // Load dictionary index data list($loadedTermDictionary, $loadedTermDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $this->assertTrue($termDictionary == $loadedTermDictionary); $this->assertTrue($termDictionaryInfos == $loadedTermDictionaryInfos); }
/** * Returns Zend_Search_Lucene instance for given subroot * * every subroot has it's own instance * * @param Kwf_Component_Data for this index * @return Zend_Search_Lucene_Interface */ public static function getInstance(Kwf_Component_Data $subroot) { while ($subroot) { if (Kwc_Abstract::getFlag($subroot->componentClass, 'subroot')) { break; } $subroot = $subroot->parent; } if (!$subroot) { $subroot = Kwf_Component_Data_Root::getInstance(); } static $instance = array(); if (!isset($instance[$subroot->componentId])) { $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2)); //$stopWords = explode(' ', 'der dir das einer eine ein und oder doch ist sind an in vor nicht wir ihr sie es ich'); //$analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords)); Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666); $path = 'cache/fulltext'; $path .= '/' . $subroot->componentId; try { $instance[$subroot->componentId] = Zend_Search_Lucene::open($path); } catch (Zend_Search_Lucene_Exception $e) { $instance[$subroot->componentId] = Zend_Search_Lucene::create($path); } } return $instance[$subroot->componentId]; }
/** * Write changes if it's necessary. */ public function writeChanges() { if (!$this->_deletedDirty) { return; } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount / 8) + 1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count * 8 + $bit])) { $byte |= 1 << $bit; } } $delBytes[$count] = chr($byte); } $bitCount = count($this->_deleted); } $delFile = $this->_directory->createFile($this->_name . '.del'); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; }
/** * Constructor to set initial values. * * @param string $location The index location */ public function __construct($location) { $this->location = $location; $this->analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive(); require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php'; Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666); }
public function __construct() { $index_path = BUGDAR_ROOT . '/cache/lucene_index'; Zend_Search_Lucene_Storage_Directory_Filesystem::SetDefaultFilePermissions(0777); if (file_exists($index_path)) { $this->lucene = Zend_Search_Lucene::Open($index_path); } else { $this->lucene = Zend_Search_Lucene::Create($index_path); } }
/** * Opens index file stoted within compound index file * * @param string $extension * @throws Zend_Search_Lucene_Exception * @return Zend_Search_Lucene_Storage_File */ public function openCompoundFile($extension) { $filename = $this->_name . $extension; if (!isset($this->_segFiles[$filename])) { throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.'); } $file = $this->_directory->getFileObject($this->_name . ".cfs"); $file->seek($this->_segFiles[$filename]); return $file; }
public function createFile($filename) { try { parent::createFile($filename); } catch (Zend_Search_Lucene_Exception $e) { if (false === strpos($e->getMessage(), 'chmod')) { throw $e; } } return $this->_fileHandlers[$filename]; }
public function testMerge() { $segmentsDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source'); $outputDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files'); $segmentsList = array('_0', '_1', '_2', '_3', '_4'); $segmentMerger = new Zend_Search_Lucene_Index_SegmentMerger($outputDirectory, 'mergedSegment'); foreach ($segmentsList as $segmentName) { $segmentMerger->addSource(new Zend_Search_Lucene_Index_SegmentInfo($segmentName, 2, $segmentsDirectory)); } $mergedSegment = $segmentMerger->merge(); $this->assertTrue($mergedSegment instanceof Zend_Search_Lucene_Index_SegmentInfo); unset($mergedSegment); $mergedFile = $outputDirectory->getFileObject('mergedSegment.cfs'); $mergedFileData = $mergedFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs')); $sampleFile = $outputDirectory->getFileObject('mergedSegment.cfs.sample'); $sampleFileData = $sampleFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs.sample')); $this->assertEquals($mergedFileData, $sampleFileData); $outputDirectory->deleteFile('mergedSegment.cfs'); }
function __construct($directory, $lang = 'en', $highlight = true) { switch ($lang) { case 'en': default: Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); } Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0660); $this->directory = $directory; $this->lastModif = file_exists($directory) ? filemtime($directory) : 0; $this->highlight = (bool) $highlight; }
private static function prepareZendSearchLucene() { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $stopWords = sfConfig::get('app_sf_propel_luceneable_behavior_stopWords', false); $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(false === $stopWords ? array() : explode(',', $stopWords)); Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($stopWordsFilter); $shortWords = sfConfig::get('app_sf_propel_luceneable_behavior_shortWords', 3); $shortWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords($shortWords); Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($shortWordsFilter); Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0777); }
public function searchAction() { $form = new Application_Form_Search(); $this->view->form = $form; $form->submit->setLabel('search jobs'); $currentPermissions = Zend_Search_Lucene_Storage_Directory_Filesystem::getDefaultFilePermissions(); echo $currentPermissions; if ($this->getRequest()->isPost()) { $formData = $this->getRequest()->getPost(); if ($form->isValid($_POST)) { $query = $form->getValue('query'); $index = Zend_Search_Lucene::open('C:\\indexed'); $hits = $index->find($query); $this->view->jobs = $hits; // foreach ($hits as $hit) { //echo $hit->pri; //echo $hit->shortd; //} } else { $form->populate($formData); } } }
/** * Write changes if it's necessary. */ public function writeChanges() { if (!$this->_deletedDirty) { return; } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount / 8) + 1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count * 8 + $bit])) { $byte |= 1 << $bit; } } $delBytes[$count] = chr($byte); } $bitCount = count($this->_deleted); } // Get new generation number $lock = Zend_Search_Lucene::obtainWriteLock($this->_directory); $delFileList = array(); foreach ($this->_directory->fileList() as $file) { if ($file == $this->_name . '.del') { // Matches <segment_name>.del file name $delFileList[] = 0; } else { if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\\.del$/i', $file, $matches)) { // Matches <segment_name>_NNN.del file names $delFileList[] = (int) $matches[1]; } } } if (count($delFileList) == 0) { // There is no deletions file for current segment in the directory // Set detetions file generation number to 1 $this->_delGen = 1; } else { // There are some deletions files for current segment in the directory // Set detetions file generation number to the highest + 1 $this->_delGen = max($delFileList) + 1; } $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); Zend_Search_Lucene::releaseWriteLock($this->_directory, $lock); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; }
/** * Indexer Constructor. * * @global type $webDir */ public function __construct() { global $webDir; if (!get_config('enable_indexing')) { return; } $index_path = $webDir . self::$_index_dir; // Give read-writing permissions only for current user and group Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0600); // Utilize UTF-8 compatible text analyzer Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); try { if (file_exists($index_path)) { $this->__index = Zend_Search_Lucene::open($index_path); // Open index } else { $this->__index = Zend_Search_Lucene::create($index_path); // Create index } } catch (Zend_Search_Lucene_Exception $e) { require_once 'fatal_error.php'; } $this->__index->setFormatVersion(Zend_Search_Lucene::FORMAT_2_3); // Set Index Format Version Zend_Search_Lucene::setResultSetLimit(self::$_resultSetLimit); // Set Result Set Limit // write an .htaccess to prevent raw access to index files $htaccess = $index_path . '/.htaccess'; if (!file_exists($htaccess)) { $fd = fopen($htaccess, "w"); fwrite($fd, "deny from all\n"); fclose($fd); } if (!file_exists($index_path . '/index.php')) { touch($index_path . '/index.php'); } }
/** * Write changes if it's necessary. * * This method must be invoked only from the Writer _updateSegments() method, * so index Write lock has to be already obtained. * * @internal */ public function writeChanges() { if (!$this->_deletedDirty) { return; } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount / 8) + 1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count * 8 + $bit])) { $byte |= 1 << $bit; } } $delBytes[$count] = chr($byte); } $bitCount = count($this->_deleted); } // Get new generation number $this->_detectLatestDelGen(); if ($this->_delGen == -1) { // Set delete file generation number to 1 $this->_delGen = 1; } else { // Increase delete file generation number by 1 $this->_delGen++; } $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; }
public function __construct($path) { parent::__construct($path); sfLuceneStorageFilesystem::chmod($path, 0777); }
public function testDelete() { $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_source/_files'); $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, 0); $this->assertFalse($segmentInfo->hasDeletions()); $segmentInfo->delete(0); $this->assertTrue($segmentInfo->hasDeletions()); $delGen = $segmentInfo->getDelGen(); // don't write changes unset($segmentInfo); $segmentInfo1 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, $delGen); // Changes wasn't written, segment still has no deletions $this->assertFalse($segmentInfo1->hasDeletions()); $segmentInfo1->delete(0); $segmentInfo1->writeChanges(); $delGen = $segmentInfo1->getDelGen(); unset($segmentInfo1); $segmentInfo2 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, $delGen); $this->assertTrue($segmentInfo2->hasDeletions()); unset($segmentInfo2); $directory->deleteFile('_1_' . base_convert($delGen, 10, 36) . '.del'); $segmentInfo3 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, -1); $this->assertFalse($segmentInfo3->hasDeletions()); }
/** * Set default file permissions * * @param integer $mode */ public static function setDefaultFilePermissions($mode) { self::$_defaultFilePermissions = $mode; }
/** * Instanciate the Lucene index * * The index will be created if it doesn't exist yet. * * @return \Zend_Search_Lucene_Interface Lucene index instance * @throws Exception If the index cannot be created */ protected function _index() { // One-time instanciation or creation of the lucene index if ($this->_index === null) { // Try to instanciate an existing lucene index try { $this->_index = \Zend_Search_Lucene::open($this->_indexDirectory); // If an error occurs ... } catch (\Zend_Search_Lucene_Exception $e) { // Try to create a new lucene index ... try { $this->_index = \Zend_Search_Lucene::create($this->_indexDirectory); // If an error occurs: Failure } catch (\Zend_Search_Lucene_Exception $e) { throw new Exception(sprintf('Error creating lucene index in "%1$s", reason: "%2$s"', $this->_indexDirectory, $e->getMessage())); } } // Index setup \Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0664); \Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); \Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8'); // Minimize memory consumption $this->_index->setMaxBufferedDocs(1); // Set optimization frequency $this->_index->setMergeFactor(max(1, intval($GLOBALS['TYPO3_CONF_VARS']['EXT']['extParams']['tw_lucenesearch']['mergeFactor']))); // If applicable: Optimize index if ($this->_indexOptimize) { $this->_index->optimize(); } $this->_index->commit(); if (TYPO3_MODE == 'FE') { \Zend_Search_Lucene::setTermsPerQueryLimit(\Tollwerk\TwLucenesearch\Utility\Indexer::indexConfig($GLOBALS['TSFE'], 'search.limits.query')); } } return $this->_index; }
/** * Write changes if it's necessary. * * This method must be invoked only from the Writer _updateSegments() method, * so index Write lock has to be already obtained. * * @internal * @throws Zend_Search_Lucene_Exceptions */ public function writeChanges() { // Get new generation number $latestDelGen = $this->_detectLatestDelGen(); if (!$this->_deletedDirty) { // There was no deletions by current process if ($latestDelGen == $this->_delGen) { // Delete file hasn't been updated by any concurrent process return; } else if ($latestDelGen > $this->_delGen) { // Delete file has been updated by some concurrent process // Reload deletions file $this->_delGen = $latestDelGen; $this->_deleted = $this->_loadDelFile(); return; } else { require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.'); } } if ($latestDelGen > $this->_delGen) { // Merge current deletions with latest deletions file $this->_delGen = $latestDelGen; $latestDelete = $this->_loadDelFile(); if (extension_loaded('bitset')) { $this->_deleted = bitset_union($this->_deleted, $latestDelete); } else { $this->_deleted += $latestDelete; } } if (extension_loaded('bitset')) { $delBytes = $this->_deleted; $bitCount = count(bitset_to_array($delBytes)); } else { $byteCount = floor($this->_docCount/8)+1; $delBytes = str_repeat(chr(0), $byteCount); for ($count = 0; $count < $byteCount; $count++) { $byte = 0; for ($bit = 0; $bit < 8; $bit++) { if (isset($this->_deleted[$count*8 + $bit])) { $byte |= (1<<$bit); } } $delBytes[$count] = chr($byte); } $bitCount = count($this->_deleted); } if ($this->_delGen == -1) { // Set delete file generation number to 1 $this->_delGen = 1; } else { // Increase delete file generation number by 1 $this->_delGen++; } $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $delFile->writeInt($this->_docCount); $delFile->writeInt($bitCount); $delFile->writeBytes($delBytes); $this->_deletedDirty = false; }
private function getIndex() { global $prefs; Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0660); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); if ($this->indexNeedsRebuilding()) { return $this->rebuildIndex(); } return Zend_Search_Lucene::open($this->file); }