/** * Generate compound index file */ protected function _generateCFS() { $cfsFile = $this->_directory->createFile($this->_name . '.cfs'); $cfsFile->writeVInt(count($this->_files)); $dataOffsetPointers = array(); foreach ($this->_files as $fileName) { $dataOffsetPointers[$fileName] = $cfsFile->tell(); $cfsFile->writeLong(0); // write dummy data $cfsFile->writeString($fileName); } foreach ($this->_files as $fileName) { // Get actual data offset $dataOffset = $cfsFile->tell(); // Seek to the data offset pointer $cfsFile->seek($dataOffsetPointers[$fileName]); // Write actual data offset value $cfsFile->writeLong($dataOffset); // Seek back to the end of file $cfsFile->seek($dataOffset); $dataFile = $this->_directory->getFileObject($fileName); $byteCount = $this->_directory->fileLength($fileName); while ($byteCount > 0) { $data = $dataFile->readBytes(min($byteCount, 131072)); $byteCount -= strlen($data); $cfsFile->writeBytes($data); } $this->_directory->deleteFile($fileName); } }
/** * Update segments file by adding current segment to a list * * @throws \ZendSearch\Lucene\Exception\RuntimeException * @throws \ZendSearch\Lucene\Exception\InvalidFileFormatException */ private function _updateSegments() { // Get an exclusive index lock Lucene\LockManager::obtainWriteLock($this->_directory); // Write down changes for the segments foreach ($this->_segmentInfos as $segInfo) { $segInfo->writeChanges(); } $generation = Lucene\Index::getActualGeneration($this->_directory); $segmentsFile = $this->_directory->getFileObject(Lucene\Index::getSegmentFileName($generation), false); $newSegmentFile = $this->_directory->createFile(Lucene\Index::getSegmentFileName(++$generation), false); try { $genFile = $this->_directory->getFileObject('segments.gen', false); } catch (ExceptionInterface $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { $genFile = $this->_directory->createFile('segments.gen'); } else { throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } } $genFile->writeInt((int) 0xfffffffe); // Write generation (first copy) $genFile->writeLong($generation); try { // Write format marker if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_1) { $newSegmentFile->writeInt((int) 0xfffffffd); } elseif ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) { $newSegmentFile->writeInt((int) 0xfffffffc); } // Read src file format identifier $format = $segmentsFile->readInt(); if ($format == (int) 0xffffffff) { $srcFormat = Lucene\Index::FORMAT_PRE_2_1; } elseif ($format == (int) 0xfffffffd) { $srcFormat = Lucene\Index::FORMAT_2_1; } elseif ($format == (int) 0xfffffffc) { $srcFormat = Lucene\Index::FORMAT_2_3; } else { throw new InvalidFileFormatException('Unsupported segments file format'); } $version = $segmentsFile->readLong() + $this->_versionUpdate; $this->_versionUpdate = 0; $newSegmentFile->writeLong($version); // Write segment name counter $newSegmentFile->writeInt($segmentsFile->readInt()); // Get number of segments offset $numOfSegmentsOffset = $newSegmentFile->tell(); // Write dummy data (segment counter) $newSegmentFile->writeInt(0); // Read number of segemnts $segmentsCount = $segmentsFile->readInt(); $segments = array(); for ($count = 0; $count < $segmentsCount; $count++) { $segName = $segmentsFile->readString(); $segSize = $segmentsFile->readInt(); if ($srcFormat == Lucene\Index::FORMAT_PRE_2_1) { // pre-2.1 index format $delGen = 0; $hasSingleNormFile = false; $numField = (int) 0xffffffff; $isCompoundByte = 0; $docStoreOptions = null; } else { $delGen = $segmentsFile->readLong(); if ($srcFormat == Lucene\Index::FORMAT_2_3) { $docStoreOffset = $segmentsFile->readInt(); if ($docStoreOffset != (int) 0xffffffff) { $docStoreSegment = $segmentsFile->readString(); $docStoreIsCompoundFile = $segmentsFile->readByte(); $docStoreOptions = array('offset' => $docStoreOffset, 'segment' => $docStoreSegment, 'isCompound' => $docStoreIsCompoundFile == 1); } else { $docStoreOptions = null; } } else { $docStoreOptions = null; } $hasSingleNormFile = $segmentsFile->readByte(); $numField = $segmentsFile->readInt(); $normGens = array(); if ($numField != (int) 0xffffffff) { for ($count1 = 0; $count1 < $numField; $count1++) { $normGens[] = $segmentsFile->readLong(); } } $isCompoundByte = $segmentsFile->readByte(); } if (!in_array($segName, $this->_segmentsToDelete)) { // Load segment if necessary if (!isset($this->_segmentInfos[$segName])) { if ($isCompoundByte == 0xff) { // The segment is not a compound file $isCompound = false; } elseif ($isCompoundByte == 0x0) { // The status is unknown $isCompound = null; } elseif ($isCompoundByte == 0x1) { // The segment is a compound file $isCompound = true; } $this->_segmentInfos[$segName] = new SegmentInfo($this->_directory, $segName, $segSize, $delGen, $docStoreOptions, $hasSingleNormFile, $isCompound); } else { // Retrieve actual deletions file generation number $delGen = $this->_segmentInfos[$segName]->getDelGen(); } $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segSize); $newSegmentFile->writeLong($delGen); if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) { if ($docStoreOptions !== null) { $newSegmentFile->writeInt($docStoreOffset); $newSegmentFile->writeString($docStoreSegment); $newSegmentFile->writeByte($docStoreIsCompoundFile); } else { // Set DocStoreOffset to -1 $newSegmentFile->writeInt((int) 0xffffffff); } } elseif ($docStoreOptions !== null) { // Release index write lock Lucene\LockManager::releaseWriteLock($this->_directory); throw new RuntimeException('Index conversion to lower format version is not supported.'); } $newSegmentFile->writeByte($hasSingleNormFile); $newSegmentFile->writeInt($numField); if ($numField != (int) 0xffffffff) { foreach ($normGens as $normGen) { $newSegmentFile->writeLong($normGen); } } $newSegmentFile->writeByte($isCompoundByte); $segments[$segName] = $segSize; } } $segmentsFile->close(); $segmentsCount = count($segments) + count($this->_newSegments); foreach ($this->_newSegments as $segName => $segmentInfo) { $newSegmentFile->writeString($segName); $newSegmentFile->writeInt($segmentInfo->count()); // delete file generation: -1 (there is no delete file yet) $newSegmentFile->writeInt((int) 0xffffffff); $newSegmentFile->writeInt((int) 0xffffffff); if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) { // docStoreOffset: -1 (segment doesn't use shared doc store) $newSegmentFile->writeInt((int) 0xffffffff); } // HasSingleNormFile $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); // NumField $newSegmentFile->writeInt((int) 0xffffffff); // IsCompoundFile $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1); $segments[$segmentInfo->getName()] = $segmentInfo->count(); $this->_segmentInfos[$segName] = $segmentInfo; } $this->_newSegments = array(); $newSegmentFile->seek($numOfSegmentsOffset); $newSegmentFile->writeInt($segmentsCount); // Update segments count $newSegmentFile->close(); } catch (\Exception $e) { /** Restore previous index generation */ $generation--; $genFile->seek(4, SEEK_SET); // Write generation number twice $genFile->writeLong($generation); $genFile->writeLong($generation); // Release index write lock Lucene\LockManager::releaseWriteLock($this->_directory); // Throw the exception throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } // Write generation (second copy) $genFile->writeLong($generation); // Check if another update or read process is not running now // If yes, skip clean-up procedure if (Lucene\LockManager::escalateReadLock($this->_directory)) { /** * Clean-up directory */ $filesToDelete = array(); $filesTypes = array(); $filesNumbers = array(); // list of .del files of currently used segments // each segment can have several generations of .del files // only last should not be deleted $delFiles = array(); foreach ($this->_directory->fileList() as $file) { if ($file == 'deletable') { // 'deletable' file $filesToDelete[] = $file; $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1 $filesNumbers[] = 0; } elseif ($file == 'segments') { // 'segments' file $filesToDelete[] = $file; $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1) $filesNumbers[] = 0; } elseif (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) { // 'segments_xxx' file // Check if it's not a just created generation file if ($file != Lucene\Index::getSegmentFileName($generation)) { $filesToDelete[] = $file; $filesTypes[] = 2; // first group of files for deletions $filesNumbers[] = (int) base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers } } elseif (preg_match('/(^_([a-zA-Z0-9]+))\\.f\\d+$/i', $file, $matches)) { // one of per segment files ('<segment_name>.f<decimal_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert($matches[2], 36, 10); // order by segment number } } elseif (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\\.del$/i', $file, $matches)) { // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>') // Check if it's not one of the segments in the current segments set if (!isset($segments[$matches[1]])) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert($matches[2], 36, 10); // order by segment number } else { $segmentNumber = (int) base_convert($matches[2], 36, 10); $delGeneration = (int) base_convert($matches[4], 36, 10); if (!isset($delFiles[$segmentNumber])) { $delFiles[$segmentNumber] = array(); } $delFiles[$segmentNumber][$delGeneration] = $file; } } elseif (isset(self::$_indexExtensions[substr($file, strlen($file) - 4)])) { // one of per segment files ('<segment_name>.<ext>') $segmentName = substr($file, 0, strlen($file) - 4); // Check if it's not one of the segments in the current segments set if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) { $filesToDelete[] = $file; $filesTypes[] = 3; // second group of files for deletions $filesNumbers[] = (int) base_convert(substr($file, 1, strlen($file) - 5), 36, 10); // order by segment number } } } $maxGenNumber = 0; // process .del files of currently used segments foreach ($delFiles as $segmentNumber => $segmentDelFiles) { ksort($delFiles[$segmentNumber], SORT_NUMERIC); array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting end($delFiles[$segmentNumber]); $lastGenNumber = key($delFiles[$segmentNumber]); if ($lastGenNumber > $maxGenNumber) { $maxGenNumber = $lastGenNumber; } } foreach ($delFiles as $segmentNumber => $segmentDelFiles) { foreach ($segmentDelFiles as $delGeneration => $file) { $filesToDelete[] = $file; $filesTypes[] = 4; // third group of files for deletions $filesNumbers[] = $segmentNumber * $maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair } } // Reorder files for deleting array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING); foreach ($filesToDelete as $file) { try { /** Skip shared docstore segments deleting */ /** @todo Process '.cfx' files to check if them are already unused */ if (substr($file, strlen($file) - 4) != '.cfx') { $this->_directory->deleteFile($file); } } catch (ExceptionInterface $e) { if (strpos($e->getMessage(), 'Can\'t delete file') === false) { // That's not "file is under processing or already deleted" exception // Pass it through throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } } } // Return read lock into the previous state Lucene\LockManager::deEscalateReadLock($this->_directory); } else { // Only release resources if another index reader is running now foreach ($this->_segmentsToDelete as $segName) { foreach (self::$_indexExtensions as $ext) { $this->_directory->purgeFile($segName . $ext); } } } // Clean-up _segmentsToDelete container $this->_segmentsToDelete = array(); // Release index write lock Lucene\LockManager::releaseWriteLock($this->_directory); // Remove unused segments from segments list foreach ($this->_segmentInfos as $segName => $segmentInfo) { if (!isset($segments[$segName])) { unset($this->_segmentInfos[$segName]); } } }