Exemple #1
0
 /**
  * Update segments file by adding current segment to a list
  *
  * @throws \Zend\Search\Lucene\Exception\RuntimeException
  * @throws \Zend\Search\Lucene\Exception\InvalidFileFormatException
  */
 private function _updateSegments()
 {
     // Get an exclusive index lock
     Lucene\LockManager::obtainWriteLock($this->_directory);
     // Write down changes for the segments
     foreach ($this->_segmentInfos as $segInfo) {
         $segInfo->writeChanges();
     }
     $generation = Lucene\Index::getActualGeneration($this->_directory);
     $segmentsFile = $this->_directory->getFileObject(Lucene\Index::getSegmentFileName($generation), false);
     $newSegmentFile = $this->_directory->createFile(Lucene\Index::getSegmentFileName(++$generation), false);
     try {
         $genFile = $this->_directory->getFileObject('segments.gen', false);
     } catch (Lucene\Exception $e) {
         if (strpos($e->getMessage(), 'is not readable') !== false) {
             $genFile = $this->_directory->createFile('segments.gen');
         } else {
             throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
         }
     }
     $genFile->writeInt((int) 0.0);
     // Write generation (first copy)
     $genFile->writeLong($generation);
     try {
         // Write format marker
         if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_1) {
             $newSegmentFile->writeInt((int) 4294967293.0);
         } else {
             if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) {
                 $newSegmentFile->writeInt((int) 4294967292.0);
             }
         }
         // Read src file format identifier
         $format = $segmentsFile->readInt();
         if ($format == (int) 4294967295.0) {
             $srcFormat = Lucene\Index::FORMAT_PRE_2_1;
         } else {
             if ($format == (int) 4294967293.0) {
                 $srcFormat = Lucene\Index::FORMAT_2_1;
             } else {
                 if ($format == (int) 4294967292.0) {
                     $srcFormat = Lucene\Index::FORMAT_2_3;
                 } else {
                     throw new InvalidFileFormatException('Unsupported segments file format');
                 }
             }
         }
         $version = $segmentsFile->readLong() + $this->_versionUpdate;
         $this->_versionUpdate = 0;
         $newSegmentFile->writeLong($version);
         // Write segment name counter
         $newSegmentFile->writeInt($segmentsFile->readInt());
         // Get number of segments offset
         $numOfSegmentsOffset = $newSegmentFile->tell();
         // Write dummy data (segment counter)
         $newSegmentFile->writeInt(0);
         // Read number of segemnts
         $segmentsCount = $segmentsFile->readInt();
         $segments = array();
         for ($count = 0; $count < $segmentsCount; $count++) {
             $segName = $segmentsFile->readString();
             $segSize = $segmentsFile->readInt();
             if ($srcFormat == Lucene\Index::FORMAT_PRE_2_1) {
                 // pre-2.1 index format
                 $delGen = 0;
                 $hasSingleNormFile = false;
                 $numField = (int) 4294967295.0;
                 $isCompoundByte = 0;
                 $docStoreOptions = null;
             } else {
                 $delGen = $segmentsFile->readLong();
                 if ($srcFormat == Lucene\Index::FORMAT_2_3) {
                     $docStoreOffset = $segmentsFile->readInt();
                     if ($docStoreOffset != (int) 4294967295.0) {
                         $docStoreSegment = $segmentsFile->readString();
                         $docStoreIsCompoundFile = $segmentsFile->readByte();
                         $docStoreOptions = array('offset' => $docStoreOffset, 'segment' => $docStoreSegment, 'isCompound' => $docStoreIsCompoundFile == 1);
                     } else {
                         $docStoreOptions = null;
                     }
                 } else {
                     $docStoreOptions = null;
                 }
                 $hasSingleNormFile = $segmentsFile->readByte();
                 $numField = $segmentsFile->readInt();
                 $normGens = array();
                 if ($numField != (int) 4294967295.0) {
                     for ($count1 = 0; $count1 < $numField; $count1++) {
                         $normGens[] = $segmentsFile->readLong();
                     }
                 }
                 $isCompoundByte = $segmentsFile->readByte();
             }
             if (!in_array($segName, $this->_segmentsToDelete)) {
                 // Load segment if necessary
                 if (!isset($this->_segmentInfos[$segName])) {
                     if ($isCompoundByte == 0xff) {
                         // The segment is not a compound file
                         $isCompound = false;
                     } else {
                         if ($isCompoundByte == 0x0) {
                             // The status is unknown
                             $isCompound = null;
                         } else {
                             if ($isCompoundByte == 0x1) {
                                 // The segment is a compound file
                                 $isCompound = true;
                             }
                         }
                     }
                     $this->_segmentInfos[$segName] = new SegmentInfo($this->_directory, $segName, $segSize, $delGen, $docStoreOptions, $hasSingleNormFile, $isCompound);
                 } else {
                     // Retrieve actual deletions file generation number
                     $delGen = $this->_segmentInfos[$segName]->getDelGen();
                 }
                 $newSegmentFile->writeString($segName);
                 $newSegmentFile->writeInt($segSize);
                 $newSegmentFile->writeLong($delGen);
                 if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) {
                     if ($docStoreOptions !== null) {
                         $newSegmentFile->writeInt($docStoreOffset);
                         $newSegmentFile->writeString($docStoreSegment);
                         $newSegmentFile->writeByte($docStoreIsCompoundFile);
                     } else {
                         // Set DocStoreOffset to -1
                         $newSegmentFile->writeInt((int) 4294967295.0);
                     }
                 } else {
                     if ($docStoreOptions !== null) {
                         // Release index write lock
                         Lucene\LockManager::releaseWriteLock($this->_directory);
                         throw new RuntimeException('Index conversion to lower format version is not supported.');
                     }
                 }
                 $newSegmentFile->writeByte($hasSingleNormFile);
                 $newSegmentFile->writeInt($numField);
                 if ($numField != (int) 4294967295.0) {
                     foreach ($normGens as $normGen) {
                         $newSegmentFile->writeLong($normGen);
                     }
                 }
                 $newSegmentFile->writeByte($isCompoundByte);
                 $segments[$segName] = $segSize;
             }
         }
         $segmentsFile->close();
         $segmentsCount = count($segments) + count($this->_newSegments);
         foreach ($this->_newSegments as $segName => $segmentInfo) {
             $newSegmentFile->writeString($segName);
             $newSegmentFile->writeInt($segmentInfo->count());
             // delete file generation: -1 (there is no delete file yet)
             $newSegmentFile->writeInt((int) 4294967295.0);
             $newSegmentFile->writeInt((int) 4294967295.0);
             if ($this->_targetFormatVersion == Lucene\Index::FORMAT_2_3) {
                 // docStoreOffset: -1 (segment doesn't use shared doc store)
                 $newSegmentFile->writeInt((int) 4294967295.0);
             }
             // HasSingleNormFile
             $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
             // NumField
             $newSegmentFile->writeInt((int) 4294967295.0);
             // IsCompoundFile
             $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
             $segments[$segmentInfo->getName()] = $segmentInfo->count();
             $this->_segmentInfos[$segName] = $segmentInfo;
         }
         $this->_newSegments = array();
         $newSegmentFile->seek($numOfSegmentsOffset);
         $newSegmentFile->writeInt($segmentsCount);
         // Update segments count
         $newSegmentFile->close();
     } catch (\Exception $e) {
         /** Restore previous index generation */
         $generation--;
         $genFile->seek(4, SEEK_SET);
         // Write generation number twice
         $genFile->writeLong($generation);
         $genFile->writeLong($generation);
         // Release index write lock
         Lucene\LockManager::releaseWriteLock($this->_directory);
         // Throw the exception
         throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
     }
     // Write generation (second copy)
     $genFile->writeLong($generation);
     // Check if another update or read process is not running now
     // If yes, skip clean-up procedure
     if (Lucene\LockManager::escalateReadLock($this->_directory)) {
         /**
          * Clean-up directory
          */
         $filesToDelete = array();
         $filesTypes = array();
         $filesNumbers = array();
         // list of .del files of currently used segments
         // each segment can have several generations of .del files
         // only last should not be deleted
         $delFiles = array();
         foreach ($this->_directory->fileList() as $file) {
             if ($file == 'deletable') {
                 // 'deletable' file
                 $filesToDelete[] = $file;
                 $filesTypes[] = 0;
                 // delete this file first, since it's not used starting from Lucene v2.1
                 $filesNumbers[] = 0;
             } else {
                 if ($file == 'segments') {
                     // 'segments' file
                     $filesToDelete[] = $file;
                     $filesTypes[] = 1;
                     // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
                     $filesNumbers[] = 0;
                 } else {
                     if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
                         // 'segments_xxx' file
                         // Check if it's not a just created generation file
                         if ($file != Lucene\Index::getSegmentFileName($generation)) {
                             $filesToDelete[] = $file;
                             $filesTypes[] = 2;
                             // first group of files for deletions
                             $filesNumbers[] = (int) base_convert(substr($file, 9), 36, 10);
                             // ordered by segment generation numbers
                         }
                     } else {
                         if (preg_match('/(^_([a-zA-Z0-9]+))\\.f\\d+$/i', $file, $matches)) {
                             // one of per segment files ('<segment_name>.f<decimal_number>')
                             // Check if it's not one of the segments in the current segments set
                             if (!isset($segments[$matches[1]])) {
                                 $filesToDelete[] = $file;
                                 $filesTypes[] = 3;
                                 // second group of files for deletions
                                 $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                                 // order by segment number
                             }
                         } else {
                             if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\\.del$/i', $file, $matches)) {
                                 // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
                                 // Check if it's not one of the segments in the current segments set
                                 if (!isset($segments[$matches[1]])) {
                                     $filesToDelete[] = $file;
                                     $filesTypes[] = 3;
                                     // second group of files for deletions
                                     $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                                     // order by segment number
                                 } else {
                                     $segmentNumber = (int) base_convert($matches[2], 36, 10);
                                     $delGeneration = (int) base_convert($matches[4], 36, 10);
                                     if (!isset($delFiles[$segmentNumber])) {
                                         $delFiles[$segmentNumber] = array();
                                     }
                                     $delFiles[$segmentNumber][$delGeneration] = $file;
                                 }
                             } else {
                                 if (isset(self::$_indexExtensions[substr($file, strlen($file) - 4)])) {
                                     // one of per segment files ('<segment_name>.<ext>')
                                     $segmentName = substr($file, 0, strlen($file) - 4);
                                     // Check if it's not one of the segments in the current segments set
                                     if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
                                         $filesToDelete[] = $file;
                                         $filesTypes[] = 3;
                                         // second group of files for deletions
                                         $filesNumbers[] = (int) base_convert(substr($file, 1, strlen($file) - 5), 36, 10);
                                         // order by segment number
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         $maxGenNumber = 0;
         // process .del files of currently used segments
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             ksort($delFiles[$segmentNumber], SORT_NUMERIC);
             array_pop($delFiles[$segmentNumber]);
             // remove last delete file generation from candidates for deleting
             end($delFiles[$segmentNumber]);
             $lastGenNumber = key($delFiles[$segmentNumber]);
             if ($lastGenNumber > $maxGenNumber) {
                 $maxGenNumber = $lastGenNumber;
             }
         }
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             foreach ($segmentDelFiles as $delGeneration => $file) {
                 $filesToDelete[] = $file;
                 $filesTypes[] = 4;
                 // third group of files for deletions
                 $filesNumbers[] = $segmentNumber * $maxGenNumber + $delGeneration;
                 // order by <segment_number>,<del_generation> pair
             }
         }
         // Reorder files for deleting
         array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING);
         foreach ($filesToDelete as $file) {
             try {
                 /** Skip shared docstore segments deleting */
                 /** @todo Process '.cfx' files to check if them are already unused */
                 if (substr($file, strlen($file) - 4) != '.cfx') {
                     $this->_directory->deleteFile($file);
                 }
             } catch (Lucene\Exception $e) {
                 if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
                     // That's not "file is under processing or already deleted" exception
                     // Pass it through
                     throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
                 }
             }
         }
         // Return read lock into the previous state
         Lucene\LockManager::deEscalateReadLock($this->_directory);
     } else {
         // Only release resources if another index reader is running now
         foreach ($this->_segmentsToDelete as $segName) {
             foreach (self::$_indexExtensions as $ext) {
                 $this->_directory->purgeFile($segName . $ext);
             }
         }
     }
     // Clean-up _segmentsToDelete container
     $this->_segmentsToDelete = array();
     // Release index write lock
     Lucene\LockManager::releaseWriteLock($this->_directory);
     // Remove unused segments from segments list
     foreach ($this->_segmentInfos as $segName => $segmentInfo) {
         if (!isset($segments[$segName])) {
             unset($this->_segmentInfos[$segName]);
         }
     }
 }
 /**
  * Generate compound index file
  */
 protected function _generateCFS()
 {
     $cfsFile = $this->_directory->createFile($this->_name . '.cfs');
     $cfsFile->writeVInt(count($this->_files));
     $dataOffsetPointers = array();
     foreach ($this->_files as $fileName) {
         $dataOffsetPointers[$fileName] = $cfsFile->tell();
         $cfsFile->writeLong(0);
         // write dummy data
         $cfsFile->writeString($fileName);
     }
     foreach ($this->_files as $fileName) {
         // Get actual data offset
         $dataOffset = $cfsFile->tell();
         // Seek to the data offset pointer
         $cfsFile->seek($dataOffsetPointers[$fileName]);
         // Write actual data offset value
         $cfsFile->writeLong($dataOffset);
         // Seek back to the end of file
         $cfsFile->seek($dataOffset);
         $dataFile = $this->_directory->getFileObject($fileName);
         $byteCount = $this->_directory->fileLength($fileName);
         while ($byteCount > 0) {
             $data = $dataFile->readBytes(min($byteCount, 131072));
             $byteCount -= strlen($data);
             $cfsFile->writeBytes($data);
         }
         $this->_directory->deleteFile($fileName);
     }
 }