Ejemplo n.º 1
0
 /**
  * Update segments file by adding current segment to a list
  *
  * @throws Zend_Search_Lucene_Exception
  */
 private function _updateSegments()
 {
     // Get an exclusive index lock
     Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
     // Write down changes for the segments
     foreach ($this->_segmentInfos as $segInfo) {
         $segInfo->writeChanges();
     }
     $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
     $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
     $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
     try {
         $genFile = $this->_directory->getFileObject('segments.gen', false);
     } catch (Zend_Search_Lucene_Exception $e) {
         if (strpos($e->getMessage(), 'is not readable') !== false) {
             $genFile = $this->_directory->createFile('segments.gen');
         } else {
             throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
         }
     }
     $genFile->writeInt((int) 0xfffffffe);
     // Write generation (first copy)
     $genFile->writeLong($generation);
     try {
         // Write format marker
         if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
             $newSegmentFile->writeInt((int) 0xfffffffd);
         } else {
             if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                 $newSegmentFile->writeInt((int) 0xfffffffc);
             }
         }
         // Read src file format identifier
         $format = $segmentsFile->readInt();
         if ($format == (int) 0xffffffff) {
             $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
         } else {
             if ($format == (int) 0xfffffffd) {
                 $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
             } else {
                 if ($format == (int) 0xfffffffc) {
                     $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
                 } else {
                     throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
                 }
             }
         }
         $version = $segmentsFile->readLong() + $this->_versionUpdate;
         $this->_versionUpdate = 0;
         $newSegmentFile->writeLong($version);
         // Write segment name counter
         $newSegmentFile->writeInt($segmentsFile->readInt());
         // Get number of segments offset
         $numOfSegmentsOffset = $newSegmentFile->tell();
         // Write dummy data (segment counter)
         $newSegmentFile->writeInt(0);
         // Read number of segemnts
         $segmentsCount = $segmentsFile->readInt();
         $segments = array();
         for ($count = 0; $count < $segmentsCount; $count++) {
             $segName = $segmentsFile->readString();
             $segSize = $segmentsFile->readInt();
             if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
                 // pre-2.1 index format
                 $delGen = 0;
                 $hasSingleNormFile = false;
                 $numField = (int) 0xffffffff;
                 $isCompoundByte = 0;
                 $docStoreOptions = null;
             } else {
                 $delGen = $segmentsFile->readLong();
                 if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
                     $docStoreOffset = $segmentsFile->readInt();
                     if ($docStoreOffset != (int) 0xffffffff) {
                         $docStoreSegment = $segmentsFile->readString();
                         $docStoreIsCompoundFile = $segmentsFile->readByte();
                         $docStoreOptions = array('offset' => $docStoreOffset, 'segment' => $docStoreSegment, 'isCompound' => $docStoreIsCompoundFile == 1);
                     } else {
                         $docStoreOptions = null;
                     }
                 } else {
                     $docStoreOptions = null;
                 }
                 $hasSingleNormFile = $segmentsFile->readByte();
                 $numField = $segmentsFile->readInt();
                 $normGens = array();
                 if ($numField != (int) 0xffffffff) {
                     for ($count1 = 0; $count1 < $numField; $count1++) {
                         $normGens[] = $segmentsFile->readLong();
                     }
                 }
                 $isCompoundByte = $segmentsFile->readByte();
             }
             if (!in_array($segName, $this->_segmentsToDelete)) {
                 // Load segment if necessary
                 if (!isset($this->_segmentInfos[$segName])) {
                     if ($isCompoundByte == 0xff) {
                         // The segment is not a compound file
                         $isCompound = false;
                     } else {
                         if ($isCompoundByte == 0x0) {
                             // The status is unknown
                             $isCompound = null;
                         } else {
                             if ($isCompoundByte == 0x1) {
                                 // The segment is a compound file
                                 $isCompound = true;
                             }
                         }
                     }
                     /** Zend_Search_Lucene_Index_SegmentInfo */
                     require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
                     $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $docStoreOptions, $hasSingleNormFile, $isCompound);
                 } else {
                     // Retrieve actual deletions file generation number
                     $delGen = $this->_segmentInfos[$segName]->getDelGen();
                 }
                 $newSegmentFile->writeString($segName);
                 $newSegmentFile->writeInt($segSize);
                 $newSegmentFile->writeLong($delGen);
                 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                     if ($docStoreOptions !== null) {
                         $newSegmentFile->writeInt($docStoreOffset);
                         $newSegmentFile->writeString($docStoreSegment);
                         $newSegmentFile->writeByte($docStoreIsCompoundFile);
                     } else {
                         // Set DocStoreOffset to -1
                         $newSegmentFile->writeInt((int) 0xffffffff);
                     }
                 } else {
                     if ($docStoreOptions !== null) {
                         // Release index write lock
                         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
                         throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
                     }
                 }
                 $newSegmentFile->writeByte($hasSingleNormFile);
                 $newSegmentFile->writeInt($numField);
                 if ($numField != (int) 0xffffffff) {
                     foreach ($normGens as $normGen) {
                         $newSegmentFile->writeLong($normGen);
                     }
                 }
                 $newSegmentFile->writeByte($isCompoundByte);
                 $segments[$segName] = $segSize;
             }
         }
         $segmentsFile->close();
         $segmentsCount = count($segments) + count($this->_newSegments);
         foreach ($this->_newSegments as $segName => $segmentInfo) {
             $newSegmentFile->writeString($segName);
             $newSegmentFile->writeInt($segmentInfo->count());
             // delete file generation: -1 (there is no delete file yet)
             $newSegmentFile->writeInt((int) 0xffffffff);
             $newSegmentFile->writeInt((int) 0xffffffff);
             if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                 // docStoreOffset: -1 (segment doesn't use shared doc store)
                 $newSegmentFile->writeInt((int) 0xffffffff);
             }
             // HasSingleNormFile
             $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
             // NumField
             $newSegmentFile->writeInt((int) 0xffffffff);
             // IsCompoundFile
             $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
             $segments[$segmentInfo->getName()] = $segmentInfo->count();
             $this->_segmentInfos[$segName] = $segmentInfo;
         }
         $this->_newSegments = array();
         $newSegmentFile->seek($numOfSegmentsOffset);
         $newSegmentFile->writeInt($segmentsCount);
         // Update segments count
         $newSegmentFile->close();
     } catch (Exception $e) {
         /** Restore previous index generation */
         $generation--;
         $genFile->seek(4, SEEK_SET);
         // Write generation number twice
         $genFile->writeLong($generation);
         $genFile->writeLong($generation);
         // Release index write lock
         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
         // Throw the exception
         require_once 'Zend/Search/Lucene/Exception.php';
         throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
     }
     // Write generation (second copy)
     $genFile->writeLong($generation);
     // Check if another update or read process is not running now
     // If yes, skip clean-up procedure
     if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
         /**
          * Clean-up directory
          */
         $filesToDelete = array();
         $filesTypes = array();
         $filesNumbers = array();
         // list of .del files of currently used segments
         // each segment can have several generations of .del files
         // only last should not be deleted
         $delFiles = array();
         foreach ($this->_directory->fileList() as $file) {
             if ($file == 'deletable') {
                 // 'deletable' file
                 $filesToDelete[] = $file;
                 $filesTypes[] = 0;
                 // delete this file first, since it's not used starting from Lucene v2.1
                 $filesNumbers[] = 0;
             } else {
                 if ($file == 'segments') {
                     // 'segments' file
                     $filesToDelete[] = $file;
                     $filesTypes[] = 1;
                     // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
                     $filesNumbers[] = 0;
                 } else {
                     if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
                         // 'segments_xxx' file
                         // Check if it's not a just created generation file
                         if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
                             $filesToDelete[] = $file;
                             $filesTypes[] = 2;
                             // first group of files for deletions
                             $filesNumbers[] = (int) base_convert(substr($file, 9), 36, 10);
                             // ordered by segment generation numbers
                         }
                     } else {
                         if (preg_match('/(^_([a-zA-Z0-9]+))\\.f\\d+$/i', $file, $matches)) {
                             // one of per segment files ('<segment_name>.f<decimal_number>')
                             // Check if it's not one of the segments in the current segments set
                             if (!isset($segments[$matches[1]])) {
                                 $filesToDelete[] = $file;
                                 $filesTypes[] = 3;
                                 // second group of files for deletions
                                 $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                                 // order by segment number
                             }
                         } else {
                             if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\\.del$/i', $file, $matches)) {
                                 // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
                                 // Check if it's not one of the segments in the current segments set
                                 if (!isset($segments[$matches[1]])) {
                                     $filesToDelete[] = $file;
                                     $filesTypes[] = 3;
                                     // second group of files for deletions
                                     $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                                     // order by segment number
                                 } else {
                                     $segmentNumber = (int) base_convert($matches[2], 36, 10);
                                     $delGeneration = (int) base_convert($matches[4], 36, 10);
                                     if (!isset($delFiles[$segmentNumber])) {
                                         $delFiles[$segmentNumber] = array();
                                     }
                                     $delFiles[$segmentNumber][$delGeneration] = $file;
                                 }
                             } else {
                                 if (isset(self::$_indexExtensions[substr($file, strlen($file) - 4)])) {
                                     // one of per segment files ('<segment_name>.<ext>')
                                     $segmentName = substr($file, 0, strlen($file) - 4);
                                     // Check if it's not one of the segments in the current segments set
                                     if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
                                         $filesToDelete[] = $file;
                                         $filesTypes[] = 3;
                                         // second group of files for deletions
                                         $filesNumbers[] = (int) base_convert(substr($file, 1, strlen($file) - 5), 36, 10);
                                         // order by segment number
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         $maxGenNumber = 0;
         // process .del files of currently used segments
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             ksort($delFiles[$segmentNumber], SORT_NUMERIC);
             array_pop($delFiles[$segmentNumber]);
             // remove last delete file generation from candidates for deleting
             end($delFiles[$segmentNumber]);
             $lastGenNumber = key($delFiles[$segmentNumber]);
             if ($lastGenNumber > $maxGenNumber) {
                 $maxGenNumber = $lastGenNumber;
             }
         }
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             foreach ($segmentDelFiles as $delGeneration => $file) {
                 $filesToDelete[] = $file;
                 $filesTypes[] = 4;
                 // third group of files for deletions
                 $filesNumbers[] = $segmentNumber * $maxGenNumber + $delGeneration;
                 // order by <segment_number>,<del_generation> pair
             }
         }
         // Reorder files for deleting
         array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING);
         foreach ($filesToDelete as $file) {
             try {
                 /** Skip shared docstore segments deleting */
                 /** @todo Process '.cfx' files to check if them are already unused */
                 if (substr($file, strlen($file) - 4) != '.cfx') {
                     $this->_directory->deleteFile($file);
                 }
             } catch (Zend_Search_Lucene_Exception $e) {
                 if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
                     // That's not "file is under processing or already deleted" exception
                     // Pass it through
                     throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
                 }
             }
         }
         // Return read lock into the previous state
         Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
     } else {
         // Only release resources if another index reader is running now
         foreach ($this->_segmentsToDelete as $segName) {
             foreach (self::$_indexExtensions as $ext) {
                 $this->_directory->purgeFile($segName . $ext);
             }
         }
     }
     // Clean-up _segmentsToDelete container
     $this->_segmentsToDelete = array();
     // Release index write lock
     Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
     // Remove unused segments from segments list
     foreach ($this->_segmentInfos as $segName => $segmentInfo) {
         if (!isset($segments[$segName])) {
             unset($this->_segmentInfos[$segName]);
         }
     }
 }
Ejemplo n.º 2
0
 /**
  * Opens the index.
  *
  * IndexReader constructor needs Directory as a parameter. It should be
  * a string with a path to the index folder or a Directory object.
  *
  * @param mixed $directory
  * @throws Zend_Search_Lucene_Exception
  */
 public function __construct($directory = null, $create = false)
 {
     if ($directory === null) {
         throw new Zend_Search_Exception('No index directory specified');
     }
     if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
         $this->_directory = $directory;
         $this->_closeDirOnExit = false;
     } else {
         $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
         $this->_closeDirOnExit = true;
     }
     $this->_segmentInfos = array();
     // Mark index as "under processing" to prevent other processes from premature index cleaning
     Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
     // Escalate read lock to prevent current generation index files to be deleted while opening process is not done
     Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
     $this->_generation = self::getActualGeneration($this->_directory);
     if ($create) {
         try {
             Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
         } catch (Zend_Search_Lucene_Exception $e) {
             if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
                 throw $e;
             } else {
                 throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
             }
         }
         if ($this->_generation == -1) {
             // Directory doesn't contain existing index, start from 1
             $this->_generation = 1;
             $nameCounter = 0;
         } else {
             // Directory contains existing index
             $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
             $segmentsFile->seek(12);
             // 12 = 4 (int, file format marker) + 8 (long, index version)
             $nameCounter = $segmentsFile->readInt();
             $this->_generation++;
         }
         Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
     }
     if ($this->_generation == -1) {
         throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
     } else {
         if ($this->_generation == 0) {
             $this->_readPre21SegmentsFile();
         } else {
             $this->_readSegmentsFile();
         }
     }
     // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done
     Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
 }
Ejemplo n.º 3
0
    /**
     * Update segments file by adding current segment to a list
     *
     * @throws Zend_Search_Lucene_Exception
     */
    private function _updateSegments()
    {
        // Get an exclusive index lock
        Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);

        $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
        $segmentsFile   = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
        $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);

        try {
            $genFile = $this->_directory->getFileObject('segments.gen', false);
        } catch (Zend_Search_Lucene_Exception $e) {
            if (strpos($e->getMessage(), 'is not readable') !== false) {
                $genFile = $this->_directory->createFile('segments.gen');
            } else {
                throw $e;
            }
        }
        
        $genFile->writeInt((int)0xFFFFFFFE);
        // Write generation (first copy)
        $genFile->writeLong($generation);

        try {
            // Write format marker
            $newSegmentFile->writeInt((int)0xFFFFFFFD);
    
            // Skip format identifier
            $segmentsFile->seek(4, SEEK_CUR);
            // $version = $segmentsFile->readLong() + $this->_versionUpdate;
            // Process version on 32-bit platforms
            $versionHigh = $segmentsFile->readInt();
            $versionLow  = $segmentsFile->readInt();
            $version = $versionHigh * ((double)0xFFFFFFFF + 1) +
                       (($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
            $version += $this->_versionUpdate;
            $this->_versionUpdate = 0;
            $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
            $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
    
            // Write segment name counter
            $newSegmentFile->writeInt($segmentsFile->readInt());
    
            // Get number of segments offset
            $numOfSegmentsOffset = $newSegmentFile->tell();
            // Write dummy data (segment counter)
            $newSegmentFile->writeInt(0);
    
            // Read number of segemnts
            $segmentsCount = $segmentsFile->readInt();

            $segments = array();
            for ($count = 0; $count < $segmentsCount; $count++) {
                $segName = $segmentsFile->readString();
                $segSize = $segmentsFile->readInt();
    
                if ($generation == 1 /* retrieved generation is 0 */) {
                    // pre-2.1 index format
                    $delGenHigh = 0;
                    $delGenLow  = 0;
                    $hasSingleNormFile = false;
                    $numField = (int)0xFFFFFFFF;
                    $isCompound = 1;
                } else {
                    //$delGen          = $segmentsFile->readLong();
                    $delGenHigh        = $segmentsFile->readInt();
                    $delGenLow         = $segmentsFile->readInt();
                    $hasSingleNormFile = $segmentsFile->readByte();
                    $numField          = $segmentsFile->readInt();
    
                    $normGens = array();
                    if ($numField != (int)0xFFFFFFFF) {
                        for ($count1 = 0; $count1 < $numField; $count1++) {
                            $normGens[] = $segmentsFile->readLong();
                        }
                    }
                    $isCompound        = $segmentsFile->readByte();
                }
    
                if (!in_array($segName, $this->_segmentsToDelete)) {
                    // Load segment if necessary
                    if (!isset($this->_segmentInfos[$segName])) {
                        $delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) +
                                     (($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow);
                        $this->_segmentInfos[$segName] = 
                                    new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
                                                                             $segName,
                                                                             $segSize,
                                                                             $delGen,
                                                                             $hasSingleNormFile,
                                                                             $isCompound);
                    } else {
                        // Retrieve actual detetions file generation number
                        $delGen = $this->_segmentInfos[$segName]->getDelGen();
                        
                        if ($delGen >= 0) {
                            $delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1));
                            $delGenLow  =(int)($delGen & 0xFFFFFFFF);
                        } else {
                            $delGenHigh = $delGenLow = (int)0xFFFFFFFF;
                        }
                    }
                    
                    $newSegmentFile->writeString($segName);
                    $newSegmentFile->writeInt($segSize);
                    $newSegmentFile->writeInt($delGenHigh);
                    $newSegmentFile->writeInt($delGenLow);
                    $newSegmentFile->writeByte($hasSingleNormFile);
                    $newSegmentFile->writeInt($numField);
                    if ($numField != (int)0xFFFFFFFF) {
                        foreach ($normGens as $normGen) {
                            $newSegmentFile->writeLong($normGen);
                        }
                    }
                    $newSegmentFile->writeByte($isCompound);
    
                    $segments[$segName] = $segSize;
                }
            }
            $segmentsFile->close();
    
            $segmentsCount = count($segments) + count($this->_newSegments);
    
            foreach ($this->_newSegments as $segName => $segmentInfo) {
                $newSegmentFile->writeString($segName);
                $newSegmentFile->writeInt($segmentInfo->count());
    
                // delete file generation: -1 (there is no delete file yet)
                $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
                // HasSingleNormFile
                $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
                // NumField
                $newSegmentFile->writeInt((int)0xFFFFFFFF);
                // IsCompoundFile
                $newSegmentFile->writeByte($segmentInfo->isCompound());
    
                $segments[$segmentInfo->getName()] = $segmentInfo->count();
                $this->_segmentInfos[$segName] = $segmentInfo;
            }
            $this->_newSegments = array();
    
            $newSegmentFile->seek($numOfSegmentsOffset);
            $newSegmentFile->writeInt($segmentsCount);  // Update segments count
            $newSegmentFile->close();
        } catch (Exception $e) {
            /** Restore previous index generation */
            $generation--;
            $genFile->seek(4, SEEK_SET);
            // Write generation number twice
            $genFile->writeLong($generation); $genFile->writeLong($generation);

            // Release index write lock
            Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
            
            // Throw the exception
            throw $e;
        }

        // Write generation (second copy)
        $genFile->writeLong($generation);

        
        // Check if another update process is not running now
        // If yes, skip clean-up procedure
        if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
            /**
             * Clean-up directory
             */
            $filesToDelete = array();
            $filesTypes    = array();
            $filesNumbers  = array();
            
            // list of .del files of currently used segments
            // each segment can have several generations of .del files
            // only last should not be deleted
            $delFiles = array();
            
            foreach ($this->_directory->fileList() as $file) {
                if ($file == 'deletable') {
                    // 'deletable' file
                    $filesToDelete[] = $file;
                    $filesTypes[]    = 0; // delete this file first, since it's not used starting from Lucene v2.1
                    $filesNumbers[]  = 0;
                } else if ($file == 'segments') {
                    // 'segments' file
    
                    $filesToDelete[] = $file;
                    $filesTypes[]    = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
                    $filesNumbers[]  = 0;
                } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
                    // 'segments_xxx' file
                    // Check if it's not a just created generation file
                    if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
                        $filesToDelete[] = $file;
                        $filesTypes[]    = 2; // first group of files for deletions
                        $filesNumbers[]  = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers 
                    }
                } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
                    // one of per segment files ('<segment_name>.f<decimal_number>')
                    // Check if it's not one of the segments in the current segments set
                    if (!isset($segments[$matches[1]])) {
                        $filesToDelete[] = $file;
                        $filesTypes[]    = 3; // second group of files for deletions
                        $filesNumbers[]  = (int)base_convert($matches[2], 36, 10); // order by segment number 
                    }
                } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
                    // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
                    // Check if it's not one of the segments in the current segments set
                    if (!isset($segments[$matches[1]])) {
                        $filesToDelete[] = $file;
                        $filesTypes[]    = 3; // second group of files for deletions
                        $filesNumbers[]  = (int)base_convert($matches[2], 36, 10); // order by segment number 
                    } else {
                        $segmentNumber = (int)base_convert($matches[2], 36, 10);
                        $delGeneration = (int)base_convert($matches[4], 36, 10);
                        if (!isset($delFiles[$segmentNumber])) {
                            $delFiles[$segmentNumber] = array();
                        }
                        $delFiles[$segmentNumber][$delGeneration] = $file;
                    }
                } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
                    // one of per segment files ('<segment_name>.<ext>')
                    $segmentName = substr($file, 0, strlen($file) - 4);
                    // Check if it's not one of the segments in the current segments set
                    if (!isset($segments[$segmentName])  &&
                        ($this->_currentSegment === null  ||  $this->_currentSegment->getName() != $segmentName)) {
                        $filesToDelete[] = $file;
                        $filesTypes[]    = 3; // second group of files for deletions
                        $filesNumbers[]  = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number 
                    }
                }
            }

            $maxGenNumber = 0;
            // process .del files of currently used segments
            foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
                ksort($delFiles[$segmentNumber], SORT_NUMERIC);
                array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
                
                end($delFiles[$segmentNumber]);
                $lastGenNumber = key($delFiles[$segmentNumber]);
                if ($lastGenNumber > $maxGenNumber) {
                    $maxGenNumber = $lastGenNumber; 
                }
            }
            foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
                foreach ($segmentDelFiles as $delGeneration => $file) {
                        $filesToDelete[] = $file;
                        $filesTypes[]    = 4; // third group of files for deletions
                        $filesNumbers[]  = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair 
                }
            }
            
            // Reorder files for deleting
            array_multisort($filesTypes,    SORT_ASC, SORT_NUMERIC,
                            $filesNumbers,  SORT_ASC, SORT_NUMERIC,
                            $filesToDelete, SORT_ASC, SORT_STRING);
            
            foreach ($filesToDelete as $file) {
                try {
                    $this->_directory->deleteFile($file);
                } catch (Zend_Search_Lucene_Exception $e) {
                    if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
                        // That's not "file is under processing or already deleted" exception
                        // Pass it through
                        throw $e;
                    }
                }
            }
            
            // Return read lock into the previous state
            Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
        } else {
            // Only release resources if another index reader is running now
            foreach ($this->_segmentsToDelete as $segName) {
                foreach (self::$_indexExtensions as $ext) {
                    $this->_directory->purgeFile($segName . $ext);
                }
            }
        }

        // Clean-up _segmentsToDelete container
        $this->_segmentsToDelete = array();
        

        // Release index write lock
        Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);

        // Remove unused segments from segments list
        foreach ($this->_segmentInfos as $segName => $segmentInfo) {
            if (!isset($segments[$segName])) {
                unset($this->_segmentInfos[$segName]);
            }
        }
    }
Ejemplo n.º 4
0
 private function _updateSegments()
 {
     Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
     foreach ($this->_segmentInfos as $segInfo) {
         $segInfo->writeChanges();
     }
     $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
     $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
     $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
     try {
         $genFile = $this->_directory->getFileObject('segments.gen', false);
     } catch (Zend_Search_Lucene_Exception $e) {
         if (strpos($e->getMessage(), 'is not readable') !== false) {
             $genFile = $this->_directory->createFile('segments.gen');
         } else {
             throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
         }
     }
     $genFile->writeInt((int) 0xfffffffe);
     $genFile->writeLong($generation);
     try {
         if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
             $newSegmentFile->writeInt((int) 0xfffffffd);
         } else {
             if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                 $newSegmentFile->writeInt((int) 0xfffffffc);
             }
         }
         $format = $segmentsFile->readInt();
         if ($format == (int) 0xffffffff) {
             $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
         } else {
             if ($format == (int) 0xfffffffd) {
                 $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
             } else {
                 if ($format == (int) 0xfffffffc) {
                     $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
                 } else {
                     throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
                 }
             }
         }
         $version = $segmentsFile->readLong() + $this->_versionUpdate;
         $this->_versionUpdate = 0;
         $newSegmentFile->writeLong($version);
         $newSegmentFile->writeInt($segmentsFile->readInt());
         $numOfSegmentsOffset = $newSegmentFile->tell();
         $newSegmentFile->writeInt(0);
         $segmentsCount = $segmentsFile->readInt();
         $segments = array();
         for ($count = 0; $count < $segmentsCount; $count++) {
             $segName = $segmentsFile->readString();
             $segSize = $segmentsFile->readInt();
             if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
                 $delGen = 0;
                 $hasSingleNormFile = false;
                 $numField = (int) 0xffffffff;
                 $isCompoundByte = 0;
                 $docStoreOptions = null;
             } else {
                 $delGen = $segmentsFile->readLong();
                 if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
                     $docStoreOffset = $segmentsFile->readInt();
                     if ($docStoreOffset != (int) 0xffffffff) {
                         $docStoreSegment = $segmentsFile->readString();
                         $docStoreIsCompoundFile = $segmentsFile->readByte();
                         $docStoreOptions = array('offset' => $docStoreOffset, 'segment' => $docStoreSegment, 'isCompound' => $docStoreIsCompoundFile == 1);
                     } else {
                         $docStoreOptions = null;
                     }
                 } else {
                     $docStoreOptions = null;
                 }
                 $hasSingleNormFile = $segmentsFile->readByte();
                 $numField = $segmentsFile->readInt();
                 $normGens = array();
                 if ($numField != (int) 0xffffffff) {
                     for ($count1 = 0; $count1 < $numField; $count1++) {
                         $normGens[] = $segmentsFile->readLong();
                     }
                 }
                 $isCompoundByte = $segmentsFile->readByte();
             }
             if (!in_array($segName, $this->_segmentsToDelete)) {
                 if (!isset($this->_segmentInfos[$segName])) {
                     if ($isCompoundByte == 0xff) {
                         $isCompound = false;
                     } else {
                         if ($isCompoundByte == 0x0) {
                             $isCompound = null;
                         } else {
                             if ($isCompoundByte == 0x1) {
                                 $isCompound = true;
                             }
                         }
                     }
                     $this->_segmentInfos[$segName] = new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, $segName, $segSize, $delGen, $docStoreOptions, $hasSingleNormFile, $isCompound);
                 } else {
                     $delGen = $this->_segmentInfos[$segName]->getDelGen();
                 }
                 $newSegmentFile->writeString($segName);
                 $newSegmentFile->writeInt($segSize);
                 $newSegmentFile->writeLong($delGen);
                 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                     if ($docStoreOptions !== null) {
                         $newSegmentFile->writeInt($docStoreOffset);
                         $newSegmentFile->writeString($docStoreSegment);
                         $newSegmentFile->writeByte($docStoreIsCompoundFile);
                     } else {
                         $newSegmentFile->writeInt((int) 0xffffffff);
                     }
                 } else {
                     if ($docStoreOptions !== null) {
                         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
                         throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
                     }
                 }
                 $newSegmentFile->writeByte($hasSingleNormFile);
                 $newSegmentFile->writeInt($numField);
                 if ($numField != (int) 0xffffffff) {
                     foreach ($normGens as $normGen) {
                         $newSegmentFile->writeLong($normGen);
                     }
                 }
                 $newSegmentFile->writeByte($isCompoundByte);
                 $segments[$segName] = $segSize;
             }
         }
         $segmentsFile->close();
         $segmentsCount = count($segments) + count($this->_newSegments);
         foreach ($this->_newSegments as $segName => $segmentInfo) {
             $newSegmentFile->writeString($segName);
             $newSegmentFile->writeInt($segmentInfo->count());
             $newSegmentFile->writeInt((int) 0xffffffff);
             $newSegmentFile->writeInt((int) 0xffffffff);
             if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
                 $newSegmentFile->writeInt((int) 0xffffffff);
             }
             $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
             $newSegmentFile->writeInt((int) 0xffffffff);
             $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
             $segments[$segmentInfo->getName()] = $segmentInfo->count();
             $this->_segmentInfos[$segName] = $segmentInfo;
         }
         $this->_newSegments = array();
         $newSegmentFile->seek($numOfSegmentsOffset);
         $newSegmentFile->writeInt($segmentsCount);
         $newSegmentFile->close();
     } catch (Exception $e) {
         $generation--;
         $genFile->seek(4, SEEK_SET);
         $genFile->writeLong($generation);
         $genFile->writeLong($generation);
         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
         throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
     }
     $genFile->writeLong($generation);
     if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
         $filesToDelete = array();
         $filesTypes = array();
         $filesNumbers = array();
         $delFiles = array();
         foreach ($this->_directory->fileList() as $file) {
             if ($file == 'deletable') {
                 $filesToDelete[] = $file;
                 $filesTypes[] = 0;
                 $filesNumbers[] = 0;
             } else {
                 if ($file == 'segments') {
                     $filesToDelete[] = $file;
                     $filesTypes[] = 1;
                     $filesNumbers[] = 0;
                 } else {
                     if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
                         if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
                             $filesToDelete[] = $file;
                             $filesTypes[] = 2;
                             $filesNumbers[] = (int) base_convert(substr($file, 9), 36, 10);
                         }
                     } else {
                         if (preg_match('/(^_([a-zA-Z0-9]+))\\.f\\d+$/i', $file, $matches)) {
                             if (!isset($segments[$matches[1]])) {
                                 $filesToDelete[] = $file;
                                 $filesTypes[] = 3;
                                 $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                             }
                         } else {
                             if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\\.del$/i', $file, $matches)) {
                                 if (!isset($segments[$matches[1]])) {
                                     $filesToDelete[] = $file;
                                     $filesTypes[] = 3;
                                     $filesNumbers[] = (int) base_convert($matches[2], 36, 10);
                                 } else {
                                     $segmentNumber = (int) base_convert($matches[2], 36, 10);
                                     $delGeneration = (int) base_convert($matches[4], 36, 10);
                                     if (!isset($delFiles[$segmentNumber])) {
                                         $delFiles[$segmentNumber] = array();
                                     }
                                     $delFiles[$segmentNumber][$delGeneration] = $file;
                                 }
                             } else {
                                 if (isset(self::$_indexExtensions[substr($file, strlen($file) - 4)])) {
                                     $segmentName = substr($file, 0, strlen($file) - 4);
                                     if (!isset($segments[$segmentName]) && ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
                                         $filesToDelete[] = $file;
                                         $filesTypes[] = 3;
                                         $filesNumbers[] = (int) base_convert(substr($file, 1, strlen($file) - 5), 36, 10);
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         $maxGenNumber = 0;
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             ksort($delFiles[$segmentNumber], SORT_NUMERIC);
             array_pop($delFiles[$segmentNumber]);
             end($delFiles[$segmentNumber]);
             $lastGenNumber = key($delFiles[$segmentNumber]);
             if ($lastGenNumber > $maxGenNumber) {
                 $maxGenNumber = $lastGenNumber;
             }
         }
         foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
             foreach ($segmentDelFiles as $delGeneration => $file) {
                 $filesToDelete[] = $file;
                 $filesTypes[] = 4;
                 $filesNumbers[] = $segmentNumber * $maxGenNumber + $delGeneration;
             }
         }
         array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, $filesNumbers, SORT_ASC, SORT_NUMERIC, $filesToDelete, SORT_ASC, SORT_STRING);
         foreach ($filesToDelete as $file) {
             try {
                 if (substr($file, strlen($file) - 4) != '.cfx') {
                     $this->_directory->deleteFile($file);
                 }
             } catch (Zend_Search_Lucene_Exception $e) {
                 if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
                     throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
                 }
             }
         }
         Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
     } else {
         foreach ($this->_segmentsToDelete as $segName) {
             foreach (self::$_indexExtensions as $ext) {
                 $this->_directory->purgeFile($segName . $ext);
             }
         }
     }
     $this->_segmentsToDelete = array();
     Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
     foreach ($this->_segmentInfos as $segName => $segmentInfo) {
         if (!isset($segments[$segName])) {
             unset($this->_segmentInfos[$segName]);
         }
     }
 }