/** * Generate compound index file */ protected function _generateCFS() { $cfsFile = $this->_directory->createFile($this->_name . '.cfs'); $cfsFile->writeVInt(count($this->_files)); $dataOffsetPointers = array(); foreach ($this->_files as $fileName) { $dataOffsetPointers[$fileName] = $cfsFile->tell(); $cfsFile->writeLong(0); // write dummy data $cfsFile->writeString($fileName); } foreach ($this->_files as $fileName) { // Get actual data offset $dataOffset = $cfsFile->tell(); // Seek to the data offset pointer $cfsFile->seek($dataOffsetPointers[$fileName]); // Write actual data offset value $cfsFile->writeLong($dataOffset); // Seek back to the end of file $cfsFile->seek($dataOffset); $dataFile = $this->_directory->getFileObject($fileName); $byteCount = $this->_directory->fileLength($fileName); while ($byteCount > 0) { $data = $dataFile->readBytes(min($byteCount, 131072)); $byteCount -= strlen($data); $cfsFile->writeBytes($data); } $this->_directory->deleteFile($fileName); } }
public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount) { if ($generation == 0) { // Create index in pre-2.1 mode foreach ($directory->fileList() as $file) { if ($file == 'deletable' || $file == 'segments' || isset(self::$_indexExtensions[substr($file, strlen($file) - 4)]) || preg_match('/\\.f\\d+$/i', $file)) { $directory->deleteFile($file); } } $segmentsFile = $directory->createFile('segments'); $segmentsFile->writeInt((int) 0xffffffff); // write version (is initialized by current time // $segmentsFile->writeLong((int)microtime(true)); $version = microtime(true); $segmentsFile->writeInt((int) ($version / ((double) 0xffffffff + 1))); $segmentsFile->writeInt((int) ($version & 0xffffffff)); // write name counter $segmentsFile->writeInt($nameCount); // write segment counter $segmentsFile->writeInt(0); $deletableFile = $directory->createFile('deletable'); // write counter $deletableFile->writeInt(0); } else { $genFile = $directory->createFile('segments.gen'); $genFile->writeInt((int) 0xfffffffe); // Write generation two times $genFile->writeLong($generation); $genFile->writeLong($generation); $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation)); $segmentsFile->writeInt((int) 0xfffffffd); // write version (is initialized by current time // $segmentsFile->writeLong((int)microtime(true)); $version = microtime(true); $segmentsFile->writeInt((int) ($version / ((double) 0xffffffff + 1))); $segmentsFile->writeInt((int) ($version & 0xffffffff)); // write name counter $segmentsFile->writeInt($nameCount); // write segment counter $segmentsFile->writeInt(0); } }
/** * Opens the index. * * IndexReader constructor needs Directory as a parameter. It should be * a string with a path to the index folder or a Directory object. * * @param mixed $directory * @throws Zend_Search_Lucene_Exception */ public function __construct($directory = null, $create = false) { if ($directory === null) { throw new Zend_Search_Exception('No index directory specified'); } if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { $this->_directory = $directory; $this->_closeDirOnExit = false; } else { $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); $this->_closeDirOnExit = true; } // Get a shared lock to the index $this->_lock = $this->_directory->createFile('index.lock'); $this->_segmentInfos = array(); if ($create) { // Throw an exception if index is under processing now if (!$this->_lock->lock(LOCK_EX, true)) { throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now'); } // Writer will create segments file for empty segments list $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true); if (!$this->_lock->lock(LOCK_SH)) { throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared'); } } else { // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments()) if (!$this->_lock->lock(LOCK_SH)) { throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock'); } $this->_writer = null; } $segmentsFile = $this->_directory->getFileObject('segments'); $format = $segmentsFile->readInt(); if ($format != (int) 0xffffffff) { throw new Zend_Search_Lucene_Exception('Wrong segments file format'); } // read version // $segmentsFile->readLong(); $segmentsFile->readInt(); $segmentsFile->readInt(); // read segment name counter $segmentsFile->readInt(); $segments = $segmentsFile->readInt(); $this->_docCount = 0; // read segmentInfos for ($count = 0; $count < $segments; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); $this->_docCount += $segSize; $this->_segmentInfos[] = new Zend_Search_Lucene_Index_SegmentInfo($segName, $segSize, $this->_directory); } }
/** * Update segments file by adding current segment to a list * @todo !!!!!Finish the implementation * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { $segmentsFile = $this->_directory->getFileObject('segments'); $newSegmentFile = $this->_directory->createFile('segments.new'); $newSegmentFile->writeInt((int) 0xffffffff); $newSegmentFile->writeLong($this->_version); $newSegmentFile->writeInt($this->_segmentNameCounter); $newSegmentFile->writeInt($this->_segments + count($this->_newSegments)); $segmentsFile->seek(20); $newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20)); foreach ($this->_newSegments as $segmentName => $segmentInfo) { $newSegmentFile->writeString($segmentName); $newSegmentFile->writeInt($segmentInfo->count()); } $this->_directory->renameFile('segments.new', 'segments'); }
/** * Update segments file by adding current segment to a list * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { // Get an exclusive index lock Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); // Write down changes for the segments foreach ($this->_segmentInfos as $segInfo) { $segInfo->writeChanges(); } $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false); try { $genFile = $this->_directory->getFileObject('segments.gen', false); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { $genFile = $this->_directory->createFile('segments.gen'); } else { throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); } } $genFile->writeInt((int) 0xfffffffe); // Write generation (first copy) $genFile->writeLong($generation); try { // Write format marker if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) { $newSegmentFile->writeInt((int) 0xfffffffd); } else { if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { $newSegmentFile->writeInt((int) 0xfffffffc); } } // Read src file format identifier $format = $segmentsFile->readInt(); if ($format == (int) 0xffffffff) { $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1; } else { if ($format == (int) 0xfffffffd) { $srcFormat = Zend_Search_Lucene::FORMAT_2_1; } else { if ($format == (int) 0xfffffffc) { $srcFormat = Zend_Search_Lucene::FORMAT_2_3; } else { throw new Zend_Search_Lucene_Exception('Unsupported segments file format'); } } } $version = $segmentsFile->readLong() + $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeLong($version); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Get number of segments offset $numOfSegmentsOffset = $newSegmentFile->tell(); // Write dummy data (segment counter) $newSegmentFile->writeInt(0); // Read number of segemnts $segmentsCount = $segmentsFile->readInt(); $segments = array(); for ($count = 0; $count < $segmentsCount; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) { // pre-2.1 index format $delGen = 0; $hasSingleNormFile = false; $numField = (int) 0xffffffff; $isCompoundByte = 0; $docStoreOptions = null; } else { $delGen = $segmentsFile->readLong(); if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) { $docStoreOffset = $segmentsFile->readInt(); if ($docStoreOffset != (int) 0xffffffff) { $docStoreSegment = $segmentsFile->readString(); $docStoreIsCompoundFile = $segmentsFile->readByte(); $docStoreOptions = array('offset' => $docStoreOffset, 'segment' => $docStoreSegment, 'isCompound' => $docStoreIsCompoundFile == 1); } else { $docStoreOptions = null; } } else { $docStoreOptions = null; } $hasSingleNormFile = $segmentsFile->readByte(); $numField = $segmentsFile->readInt(); $normGens = array(); if ($numField != (int) 0xffffffff) { for ($count1 = 0; $count1 < $numField; $count1++) { $normGens[] = $segmentsFile->readLong(); } } $isCompoundByte = $segmentsFile->readByte(); } if (!in_array($segName, $this->_segmentsToDelete)) { // Load segment if necessary if (!isset($this->_segmentInfos[$segName])) { if ($isCompoundByte == 0xff) { // The segment is not a compound file $isCompound = false; } else { if ($isCompoundByte == 0x0) { // The status is unknown $isCompound = null; } else { if ($isCompoundByte == 0x1) { // The segment is a compound file $isCompound = true; } } } /** Zend_Search_Lucene_Index_SegmentInfo */ require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $docStoreOptions, $hasSingleNormFile, $isCompound); } else { // Retrieve actual deletions file generation number $delGen = $this->_segmentInfos[$segName]->getDelGen(); } $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); $newSegmentFile->writeLong($delGen); if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { if ($docStoreOptions !== null) { $newSegmentFile->writeInt($docStoreOffset); $newSegmentFile->writeString($docStoreSegment); $newSegmentFile->writeByte($docStoreIsCompoundFile); } else { // Set DocStoreOffset to -1 $newSegmentFile->writeInt((int) 0xffffffff); } } else { if ($docStoreOptions !== null) { // Release index write lock Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.'); } } $newSegmentFile->writeByte($hasSingleNormFile); $newSegmentFile->writeInt($numField); if ($numField != (int) 0xffffffff) { foreach ($normGens as $normGen) { $newSegmentFile->writeLong($normGen); } } $newSegmentFile->writeByte($isCompoundByte); $segments[$segName] = $segSize; } } $segmentsFile->close(); $segmentsCount = count($segments) + count($this->_newSegments); foreach ($this->_newSegments as $segName => $segmentInfo) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segmentInfo->count()); // delete file generation: -1 (there is no delete file yet) $newSegmentFile->writeInt((int) 0xffffffff); $newSegmentFile->writeInt((int) 0xffffffff); if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { // docStoreOffset: -1 (segment doesn't use shared doc store) $newSegmentFile->writeInt((int) 0xffffffff); } // HasSingleNormFile $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); // NumField $newSegmentFile->writeInt((int) 0xffffffff); // IsCompoundFile $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1); $segments[$segmentInfo->getName()] = $segmentInfo->count(); $this->_segmentInfos[$segName] = $segmentInfo; } $this->_newSegments = array(); $newSegmentFile->seek($numOfSegmentsOffset); $newSegmentFile->writeInt($segmentsCount); // Update segments count $newSegmentFile->close(); } catch (Exception $e) { /** Restore previous index generation */ $generation--; $genFile->seek(4, SEEK_SET); // Write generation number twice $genFile->writeLong($generation); $genFile->writeLong($generation); // Release index write lock Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); // Throw the exception require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); } // Write generation (second copy) $genFile->writeLong($generation); // Check if another update or read process is not running now // If yes, skip clean-up procedure if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) { /** * Clean-up directory */ $filesToDelete = array(); $filesTypes = array(); $filesNumbers = array(); // list of .del files of currently used segments // each segment can have several generations of .del files // only last should not be deleted $delFiles = array(); foreach ($this->_directory->fileList() as $file) { if ($file == 'deletable') { // 'deletable' file $filesToDelete[] = $file; $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1 $filesNumbers[] = 0; } else { if ($file == 'segments') { // 'segments' file $filesToDelete[] = $file; $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1) $filesNumbers[] = 0; } else { if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) { // 'segments_xxx' file // Check if it's not a just created generation file if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) { $filesToDelete[] = $file; $filesTypes[] = 2; // first group of files for deletions $filesNumbers[] = (int) base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers } } else { if (preg_match('/(^_([a-zA-Z0-9]+))\\.f\\d+$/i', $file, $matches)) { // one of per segment files ('<segment_name>.f<decimal_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert($matches[2], 36, 10); // order by segment number } } else { if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\\.del$/i', $file, $matches)) { // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert($matches[2], 36, 10); // order by segment number } else { $segmentNumber = (int) base_convert($matches[2], 36, 10); $delGeneration = (int) base_convert($matches[4], 36, 10); if (!isset($delFiles[$segmentNumber])) { $delFiles[$segmentNumber] = array(); } $delFiles[$segmentNumber][$delGeneration] = $file; } } else { if (isset(self::$_indexExtensions[substr($file, strlen($file) - 4)])) { // one of per segment files ('<segment_name>.<ext>') $segmentName = substr($file, 0, strlen($file) - 4); // Check if it's not one of the segments in the current segments set if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert(substr($file, 1, strlen($file) - 5), 36, 10); // order by segment number } } } } } } } } $maxGenNumber = 0; // process .del files of currently used segments foreach ($delFiles as $segmentNumber => $segmentDelFiles) { ksort($delFiles[$segmentNumber], SORT_NUMERIC); array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting end($delFiles[$segmentNumber]); $lastGenNumber = key($delFiles[$segmentNumber]); if ($lastGenNumber > $maxGenNumber) { $maxGenNumber = $lastGenNumber; } } foreach ($delFiles as $segmentNumber => $segmentDelFiles) { foreach ($segmentDelFiles as $delGeneration => $file) { $filesToDelete[] = $file; $filesTypes[] = 4; // third group of files for deletions $filesNumbers[] = $segmentNumber * $maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair } } // Reorder files for deleting array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING); foreach ($filesToDelete as $file) { try { /** Skip shared docstore segments deleting */ /** @todo Process '.cfx' files to check if them are already unused */ if (substr($file, strlen($file) - 4) != '.cfx') { $this->_directory->deleteFile($file); } } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'Can\'t delete file') === false) { // That's not "file is under processing or already deleted" exception // Pass it through throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); } } } // Return read lock into the previous state Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory); } else { // Only release resources if another index reader is running now foreach ($this->_segmentsToDelete as $segName) { foreach (self::$_indexExtensions as $ext) { $this->_directory->purgeFile($segName . $ext); } } } // Clean-up _segmentsToDelete container $this->_segmentsToDelete = array(); // Release index write lock Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); // Remove unused segments from segments list foreach ($this->_segmentInfos as $segName => $segmentInfo) { if (!isset($segments[$segName])) { unset($this->_segmentInfos[$segName]); } } }
/** * Obtain exclusive optimization lock on the index * * Returns lock object on success and false otherwise (doesn't block execution) * * @param Zend_Search_Lucene_Storage_Directory $lockDirectory * @return mixed */ public static function obtainOptimizationLock(Zend_Search_Lucene_Storage_Directory $lockDirectory) { $lock = $lockDirectory->createFile(self::OPTIMIZATION_LOCK_FILE); if (!$lock->lock(LOCK_EX, true)) { return false; } return $lock; }
/** * Update segments file by adding current segment to a list * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { // Get an exclusive index lock Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false); try { $genFile = $this->_directory->getFileObject('segments.gen', false); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { $genFile = $this->_directory->createFile('segments.gen'); } else { throw $e; } } $genFile->writeInt((int)0xFFFFFFFE); // Write generation (first copy) $genFile->writeLong($generation); try { // Write format marker $newSegmentFile->writeInt((int)0xFFFFFFFD); // Skip format identifier $segmentsFile->seek(4, SEEK_CUR); // $version = $segmentsFile->readLong() + $this->_versionUpdate; // Process version on 32-bit platforms $versionHigh = $segmentsFile->readInt(); $versionLow = $segmentsFile->readInt(); $version = $versionHigh * ((double)0xFFFFFFFF + 1) + (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow); $version += $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1))); $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF)); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Get number of segments offset $numOfSegmentsOffset = $newSegmentFile->tell(); // Write dummy data (segment counter) $newSegmentFile->writeInt(0); // Read number of segemnts $segmentsCount = $segmentsFile->readInt(); $segments = array(); for ($count = 0; $count < $segmentsCount; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if ($generation == 1 /* retrieved generation is 0 */) { // pre-2.1 index format $delGenHigh = 0; $delGenLow = 0; $hasSingleNormFile = false; $numField = (int)0xFFFFFFFF; $isCompound = 1; } else { //$delGen = $segmentsFile->readLong(); $delGenHigh = $segmentsFile->readInt(); $delGenLow = $segmentsFile->readInt(); $hasSingleNormFile = $segmentsFile->readByte(); $numField = $segmentsFile->readInt(); $normGens = array(); if ($numField != (int)0xFFFFFFFF) { for ($count1 = 0; $count1 < $numField; $count1++) { $normGens[] = $segmentsFile->readLong(); } } $isCompound = $segmentsFile->readByte(); } if (!in_array($segName, $this->_segmentsToDelete)) { // Load segment if necessary if (!isset($this->_segmentInfos[$segName])) { $delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) + (($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow); $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $hasSingleNormFile, $isCompound); } else { // Retrieve actual detetions file generation number $delGen = $this->_segmentInfos[$segName]->getDelGen(); if ($delGen >= 0) { $delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1)); $delGenLow =(int)($delGen & 0xFFFFFFFF); } else { $delGenHigh = $delGenLow = (int)0xFFFFFFFF; } } $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); $newSegmentFile->writeInt($delGenHigh); $newSegmentFile->writeInt($delGenLow); $newSegmentFile->writeByte($hasSingleNormFile); $newSegmentFile->writeInt($numField); if ($numField != (int)0xFFFFFFFF) { foreach ($normGens as $normGen) { $newSegmentFile->writeLong($normGen); } } $newSegmentFile->writeByte($isCompound); $segments[$segName] = $segSize; } } $segmentsFile->close(); $segmentsCount = count($segments) + count($this->_newSegments); foreach ($this->_newSegments as $segName => $segmentInfo) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segmentInfo->count()); // delete file generation: -1 (there is no delete file yet) $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF); // HasSingleNormFile $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); // NumField $newSegmentFile->writeInt((int)0xFFFFFFFF); // IsCompoundFile $newSegmentFile->writeByte($segmentInfo->isCompound()); $segments[$segmentInfo->getName()] = $segmentInfo->count(); $this->_segmentInfos[$segName] = $segmentInfo; } $this->_newSegments = array(); $newSegmentFile->seek($numOfSegmentsOffset); $newSegmentFile->writeInt($segmentsCount); // Update segments count $newSegmentFile->close(); } catch (Exception $e) { /** Restore previous index generation */ $generation--; $genFile->seek(4, SEEK_SET); // Write generation number twice $genFile->writeLong($generation); $genFile->writeLong($generation); // Release index write lock Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); // Throw the exception throw $e; } // Write generation (second copy) $genFile->writeLong($generation); // Check if another update process is not running now // If yes, skip clean-up procedure if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) { /** * Clean-up directory */ $filesToDelete = array(); $filesTypes = array(); $filesNumbers = array(); // list of .del files of currently used segments // each segment can have several generations of .del files // only last should not be deleted $delFiles = array(); foreach ($this->_directory->fileList() as $file) { if ($file == 'deletable') { // 'deletable' file $filesToDelete[] = $file; $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1 $filesNumbers[] = 0; } else if ($file == 'segments') { // 'segments' file $filesToDelete[] = $file; $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1) $filesNumbers[] = 0; } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) { // 'segments_xxx' file // Check if it's not a just created generation file if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) { $filesToDelete[] = $file; $filesTypes[] = 2; // first group of files for deletions $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers } } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) { // one of per segment files ('<segment_name>.f<decimal_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number } } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) { // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number } else { $segmentNumber = (int)base_convert($matches[2], 36, 10); $delGeneration = (int)base_convert($matches[4], 36, 10); if (!isset($delFiles[$segmentNumber])) { $delFiles[$segmentNumber] = array(); } $delFiles[$segmentNumber][$delGeneration] = $file; } } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) { // one of per segment files ('<segment_name>.<ext>') $segmentName = substr($file, 0, strlen($file) - 4); // Check if it's not one of the segments in the current segments set if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number } } } $maxGenNumber = 0; // process .del files of currently used segments foreach ($delFiles as $segmentNumber => $segmentDelFiles) { ksort($delFiles[$segmentNumber], SORT_NUMERIC); array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting end($delFiles[$segmentNumber]); $lastGenNumber = key($delFiles[$segmentNumber]); if ($lastGenNumber > $maxGenNumber) { $maxGenNumber = $lastGenNumber; } } foreach ($delFiles as $segmentNumber => $segmentDelFiles) { foreach ($segmentDelFiles as $delGeneration => $file) { $filesToDelete[] = $file; $filesTypes[] = 4; // third group of files for deletions $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair } } // Reorder files for deleting array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING); foreach ($filesToDelete as $file) { try { $this->_directory->deleteFile($file); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'Can\'t delete file') === false) { // That's not "file is under processing or already deleted" exception // Pass it through throw $e; } } } // Return read lock into the previous state Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory); } else { // Only release resources if another index reader is running now foreach ($this->_segmentsToDelete as $segName) { foreach (self::$_indexExtensions as $ext) { $this->_directory->purgeFile($segName . $ext); } } } // Clean-up _segmentsToDelete container $this->_segmentsToDelete = array(); // Release index write lock Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); // Remove unused segments from segments list foreach ($this->_segmentInfos as $segName => $segmentInfo) { if (!isset($segments[$segName])) { unset($this->_segmentInfos[$segName]); } } }
/** * Update segments file by adding current segment to a list * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { // Get an exclusive index lock // Wait, until all parallel searchers or indexers won't stop // and stop all next searchers, while we are updating segments file $lock = $this->_directory->getFileObject('index.lock'); if (!$lock->lock(LOCK_EX)) { throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock'); } // Do not share file handlers to get file updates from other sessions. $segmentsFile = $this->_directory->getFileObject('segments', false); $newSegmentFile = $this->_directory->createFile('segments.new', false); // Write format marker $newSegmentFile->writeInt((int) 0xffffffff); // Write index version $segmentsFile->seek(4, SEEK_CUR); $version = $segmentsFile->readLong() + $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeLong($version); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Get number of segments offset $numOfSegmentsOffset = $newSegmentFile->tell(); // Write number of segemnts $segmentsCount = $segmentsFile->readInt(); $newSegmentFile->writeInt(0); // Write dummy data (segment counter) $segments = array(); for ($count = 0; $count < $segmentsCount; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if (!in_array($segName, $this->_segmentsToDelete)) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); $segments[$segName] = $segSize; } } $segmentsFile->close(); $segmentsCount = count($segments) + count($this->_newSegments); // Remove segments, not listed in $segments (deleted) // Load segments, not listed in $this->_segmentInfos foreach ($this->_segmentInfos as $segId => $segInfo) { if (isset($segments[$segInfo->getName()])) { // Segment is already included into $this->_segmentInfos unset($segments[$segInfo->getName()]); } else { // remove deleted segment from a list unset($this->_segmentInfos[$segId]); } } // $segments contains a list of segments to load // do it later foreach ($this->_newSegments as $segName => $segmentInfo) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segmentInfo->count()); $this->_segmentInfos[] = $segmentInfo; } $this->_newSegments = array(); $newSegmentFile->seek($numOfSegmentsOffset); $newSegmentFile->writeInt($segmentsCount); // Update segments count $newSegmentFile->close(); $this->_directory->renameFile('segments.new', 'segments'); // Segments file update is finished // Switch back to shared lock mode $lock->lock(LOCK_SH); $fileList = $this->_directory->fileList(); foreach ($this->_segmentsToDelete as $nameToDelete) { foreach (self::$_indexExtensions as $ext) { if ($this->_directory->fileExists($nameToDelete . $ext)) { $this->_directory->deleteFile($nameToDelete . $ext); } } foreach ($fileList as $file) { if (substr($file, 0, strlen($nameToDelete) + 2) == $nameToDelete . '.f' && ctype_digit(substr($file, strlen($nameToDelete) + 2))) { $this->_directory->deleteFile($file); } } } $this->_segmentsToDelete = array(); // Load segments, created by other process foreach ($segments as $segName => $segSize) { // Load new segments $this->_segmentInfos[] = new Zend_Search_Lucene_Index_SegmentInfo($segName, $segSize, $this->_directory); } }
/** * Update segments file by adding current segment to a list * * @todo !!!! locks should be processed to prevent concurrent access errors * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { $segmentsFile = $this->_directory->getFileObject('segments'); $newSegmentFile = $this->_directory->createFile('segments.new'); // Write format marker $newSegmentFile->writeInt((int) 0xffffffff); // Write index version $segmentsFile->seek(4, SEEK_CUR); $version = $segmentsFile->readLong() + $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeLong($version); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Write number of segemnts $segments = $segmentsFile->readInt(); $newSegmentFile->writeInt($segments + count($this->_newSegments) - count($this->_segmentsToDelete)); for ($count = 0; $count < $segments; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if (!array_key_exists($segName, $this->_segmentsToDelete)) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); } } foreach ($this->_newSegments as $segmentName => $segmentInfo) { $newSegmentFile->writeString($segmentName); $newSegmentFile->writeInt($segmentInfo->count()); $this->_segmentInfos[] = $segmentInfo; } $this->_newSegments = array(); $fileList = $this->_directory->fileList(); foreach ($this->_segmentsToDelete as $nameToDelete) { foreach ($this->_segmentInfos as $segId => $segInfo) { if ($segInfo->getName() == $nameToDelete) { unset($this->_segmentInfos[$segId]); } } foreach (self::$_indexExtensions as $ext) { if ($this->_directory->fileExists($nameToDelete . $ext)) { $this->_directory->deleteFile($nameToDelete . $ext); } } foreach ($fileList as $file) { if (substr($file, 0, strlen($nameToDelete) + 2) == $nameToDelete . '.f' && ctype_digit(substr($file, strlen($nameToDelete) + 2))) { $this->_directory->deleteFile($file); } } } $this->_segmentsToDelete = array(); $this->_directory->renameFile('segments.new', 'segments'); }
/** * Obtain exclusive write lock on the index * * @param Zend_Search_Lucene_Storage_Directory $defaultLockDirectory * @return Zend_Search_Lucene_Storage_File * @throws Zend_Search_Lucene_Exception */ public static function obtainWriteLock(Zend_Search_Lucene_Storage_Directory $defaultLockDirectory) { $lock = $defaultLockDirectory->createFile('write.lock'); if (!$lock->lock(LOCK_EX)) { throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock'); } return $lock; }
/** * Obtain exclusive optimization lock on the index * * Returns lock object on success and false otherwise (doesn't block execution) * * @param Zend_Search_Lucene_Storage_Directory $lockDirectory * @return mixed */ public static function obtainOptimizationLock(Zend_Search_Lucene_Storage_Directory $lockDirectory) { if ($lockDirectory->fileExists(self::OPTIMIZATION_LOCK_FILE)) { return false; } $lock = $lockDirectory->createFile(self::OPTIMIZATION_LOCK_FILE); return $lock; }
public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount) { if ($generation == 0) { foreach ($directory->fileList() as $file) { if ($file == 'deletable' || $file == 'segments' || isset(self::$_indexExtensions[substr($file, strlen($file) - 4)]) || preg_match('/\\.f\\d+$/i', $file)) { $directory->deleteFile($file); } } $segmentsFile = $directory->createFile('segments'); $segmentsFile->writeInt((int) 0xffffffff); $segmentsFile->writeLong(round(microtime(true))); $segmentsFile->writeInt($nameCount); $segmentsFile->writeInt(0); $deletableFile = $directory->createFile('deletable'); $deletableFile->writeInt(0); } else { $genFile = $directory->createFile('segments.gen'); $genFile->writeInt((int) 0xfffffffe); $genFile->writeLong($generation); $genFile->writeLong($generation); $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation)); $segmentsFile->writeInt((int) 0xfffffffd); $segmentsFile->writeLong(round(microtime(true))); $segmentsFile->writeInt($nameCount); $segmentsFile->writeInt(0); } }
/** * Update segments file by adding current segment to a list * * @throws Zend_Search_Lucene_Exception */ private function _updateSegments() { // Get an exclusive index lock $lock = Zend_Search_Lucene::obtainWriteLock($this->_directory); $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false); try { $genFile = $this->_directory->getFileObject('segments.gen', false); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { $genFile = $this->_directory->createFile('segments.gen'); } else { throw $e; } } $genFile->writeInt((int) 0xfffffffe); // Write generation (first copy) $genFile->writeLong($generation); try { // Write format marker $newSegmentFile->writeInt((int) 0xfffffffd); // Skip format identifier $segmentsFile->seek(4, SEEK_CUR); // $version = $segmentsFile->readLong() + $this->_versionUpdate; // Process version on 32-bit platforms $versionHigh = $segmentsFile->readInt(); $versionLow = $segmentsFile->readInt(); $version = $versionHigh * ((double) 0xffffffff + 1) + ($versionLow < 0 ? (double) 0xffffffff - (-1 - $versionLow) : $versionLow); $version += $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeInt((int) ($version / ((double) 0xffffffff + 1))); $newSegmentFile->writeInt((int) ($version & 0xffffffff)); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Get number of segments offset $numOfSegmentsOffset = $newSegmentFile->tell(); // Write dummy data (segment counter) $newSegmentFile->writeInt(0); // Read number of segemnts $segmentsCount = $segmentsFile->readInt(); $segments = array(); for ($count = 0; $count < $segmentsCount; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if ($generation == 1) { // pre-2.1 index format $delGenHigh = 0; $delGenLow = 0; $hasSingleNormFile = false; $numField = (int) 0xffffffff; $isCompound = 1; } else { //$delGen = $segmentsFile->readLong(); $delGenHigh = $segmentsFile->readInt(); $delGenLow = $segmentsFile->readInt(); $hasSingleNormFile = $segmentsFile->readByte(); $numField = $segmentsFile->readInt(); $normGens = array(); if ($numField != (int) 0xffffffff) { for ($count1 = 0; $count1 < $numField; $count1++) { $normGens[] = $segmentsFile->readLong(); } } $isCompound = $segmentsFile->readByte(); } if (!in_array($segName, $this->_segmentsToDelete)) { // Load segment if necessary if (!isset($this->_segmentInfos[$segName])) { $delGen = $delGenHigh * ((double) 0xffffffff + 1) + ($delGenLow < 0 ? (double) 0xffffffff - (-1 - $delGenLow) : $delGenLow); $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $hasSingleNormFile, $isCompound); } else { // Retrieve actual detetions file generation number $delGen = $this->_segmentInfos[$segName]->getDelGen(); if ($delGen >= 0) { $delGenHigh = (int) ($delGen / ((double) 0xffffffff + 1)); $delGenLow = (int) ($delGen & 0xffffffff); } else { $delGenHigh = $delGenLow = (int) 0xffffffff; } } $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); $newSegmentFile->writeInt($delGenHigh); $newSegmentFile->writeInt($delGenLow); $newSegmentFile->writeByte($hasSingleNormFile); $newSegmentFile->writeInt($numField); if ($numField != (int) 0xffffffff) { foreach ($normGens as $normGen) { $newSegmentFile->writeLong($normGen); } } $newSegmentFile->writeByte($isCompound); $segments[$segName] = $segSize; } } $segmentsFile->close(); $segmentsCount = count($segments) + count($this->_newSegments); foreach ($this->_newSegments as $segName => $segmentInfo) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segmentInfo->count()); // delete file generation: -1 (there is no delete file yet) $newSegmentFile->writeInt((int) 0xffffffff); $newSegmentFile->writeInt((int) 0xffffffff); // HasSingleNormFile $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); // NumField $newSegmentFile->writeInt((int) 0xffffffff); // IsCompoundFile $newSegmentFile->writeByte($segmentInfo->isCompound()); $segments[$segmentInfo->getName()] = $segmentInfo->count(); $this->_segmentInfos[$segName] = $segmentInfo; } $this->_newSegments = array(); $newSegmentFile->seek($numOfSegmentsOffset); $newSegmentFile->writeInt($segmentsCount); // Update segments count $newSegmentFile->close(); // Clean-up directory foreach ($this->_directory->fileList() as $file) { if ($file == 'deletable' || $file == 'segments' || isset(self::$_indexExtensions[substr($file, strlen($file) - 4)]) || preg_match('/^segments_[a-zA-Z0-9]+$/i', $file) || preg_match('/\\.f\\d+$/i', $file)) { // check, that file is not used by current index generation if ($file == Zend_Search_Lucene::getSegmentFileName($generation) || isset($segments[substr($file, 0, strlen($file) - 4)]) || isset($segments[substr($file, 0, strpos($file, '.f'))]) || substr($file, strlen($file) - 4) == '.del') { continue; } try { $this->_directory->deleteFile($file); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'Can\'t delete file') === 0) { // File is under processing // Stop clean-up process break; } else { throw $e; } } } } } catch (Exception $e) { /** Restore previous index generation */ $generation--; $genFile->seek(4, SEEK_SET); // Write generation number twice $genFile->writeLong($generation); $genFile->writeLong($generation); // Release index write lock Zend_Search_Lucene::releaseWriteLock($this->_directory, $lock); // Throw the exception throw $e; } // Write generation (second copy) $genFile->writeLong($generation); // Release index write lock Zend_Search_Lucene::releaseWriteLock($this->_directory, $lock); // Remove unused segments from segments list foreach ($this->_segmentInfos as $segName => $segmentInfo) { if (!isset($segments[$segName])) { unset($this->_segmentInfos[$segName]); } } }