/** * Zend_Search_Lucene_Index_SegmentInfo constructor * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name * @param integer $docCount * @param integer $delGen * @param array|null $docStoreOptions * @param boolean $hasSingleNormFile * @param boolean $isCompound */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null) { $this->_directory = $directory; $this->_name = $name; $this->_docCount = $docCount; if ($docStoreOptions !== null) { $this->_usesSharedDocStore = true; $this->_sharedDocStoreOptions = $docStoreOptions; if ($docStoreOptions['isCompound']) { $cfxFile = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx'); $cfxFilesCount = $cfxFile->readVInt(); $cfxFiles = array(); $cfxFileSizes = array(); for ($count = 0; $count < $cfxFilesCount; $count++) { $dataOffset = $cfxFile->readLong(); if ($count != 0) { $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles); } $fileName = $cfxFile->readString(); $cfxFiles[$fileName] = $dataOffset; } if ($count != 0) { $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset; } $this->_sharedDocStoreOptions['files'] = $cfxFiles; $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes; } } $this->_hasSingleNormFile = $hasSingleNormFile; $this->_delGen = $delGen; $this->_termDictionary = null; if ($isCompound !== null) { $this->_isCompound = $isCompound; } else { // It's a pre-2.1 segment or isCompound is set to 'unknown' // Detect if segment uses compound file require_once 'Zend/Search/Lucene/Exception.php'; try { // Try to open compound file $this->_directory->getFileObject($name . '.cfs'); // Compound file is found $this->_isCompound = true; } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { // Compound file is not found or is not readable $this->_isCompound = false; } else { throw $e; } } } $this->_segFiles = array(); if ($this->_isCompound) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count=0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 0x01 /* field is indexed */, $count, $fieldBits & 0x02 /* termvectors are stored */, $fieldBits & 0x10 /* norms are omitted */, $fieldBits & 0x20 /* payloads are stored */); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); if ($this->_delGen == -2) { // SegmentInfo constructor is invoked from index writer // Autodetect current delete file generation number $this->_delGen = $this->_detectLatestDelGen(); } // Load deletions $this->_deleted = $this->_loadDelFile(); }
/** * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, * Documents count and Directory as a parameter. * * @param string $name * @param integer $docCount * @param Zend_Search_Lucene_Storage_Directory $directory */ public function __construct($name, $docCount, $directory) { $this->_name = $name; $this->_docCount = $docCount; $this->_directory = $directory; $this->_termDictionary = null; $this->_segFiles = array(); if ($this->_directory->fileExists($name . '.cfs')) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count = 0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 1, $count, $fieldBits & 2); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); try { $delFile = $this->openCompoundFile('.del'); $byteCount = $delFile->readInt(); $byteCount = ceil($byteCount / 8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes[$count]); for ($bit = 0; $bit < 8; $bit++) { if ($byte & 1 << $bit) { $this->_deleted[$count * 8 + $bit] = 1; } } } } } catch (Zend_Search_Exception $e) { if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false) { $this->_deleted = null; } else { throw $e; } } }
/** * Zend_Search_Lucene_Index_SegmentInfo constructor * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name * @param integer $docCount * @param integer $delGen * @param array|null $docStoreOptions * @param boolean $hasSingleNormFile * @param boolean $isCompound */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null) { $this->_directory = $directory; $this->_name = $name; $this->_docCount = $docCount; if ($docStoreOptions !== null) { $this->_usesSharedDocStore = true; $this->_sharedDocStoreOptions = $docStoreOptions; if ($docStoreOptions['isCompound']) { $cfxFile = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx'); $cfxFilesCount = $cfxFile->readVInt(); $cfxFiles = array(); $cfxFileSizes = array(); for ($count = 0; $count < $cfxFilesCount; $count++) { $dataOffset = $cfxFile->readLong(); if ($count != 0) { $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles); } $fileName = $cfxFile->readString(); $cfxFiles[$fileName] = $dataOffset; } if ($count != 0) { $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset; } $this->_sharedDocStoreOptions['files'] = $cfxFiles; $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes; } } $this->_hasSingleNormFile = $hasSingleNormFile; $this->_delGen = $delGen; $this->_termDictionary = null; if ($isCompound !== null) { $this->_isCompound = $isCompound; } else { // It's a pre-2.1 segment or isCompound is set to 'unknown' // Detect if segment uses compound file try { // Try to open compound file $this->_directory->getFileObject($name . '.cfs'); // Compound file is found $this->_isCompound = true; } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { // Compound file is not found or is not readable $this->_isCompound = false; } else { throw $e; } } } $this->_segFiles = array(); if ($this->_isCompound) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count = 0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 0x1, $count, $fieldBits & 0x2, $fieldBits & 0x10, $fieldBits & 0x20); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); if ($this->_delGen == -2) { $this->_detectLatestDelGen(); } if ($this->_delGen == -1) { // There is no delete file for this segment // Do nothing } else { if ($this->_delGen == 0) { // It's a segment with pre-2.1 format delete file // Try to find delete file try { // '.del' files always stored in a separate file // Segment compound is not used $delFile = $this->_directory->getFileObject($this->_name . '.del'); $byteCount = $delFile->readInt(); $byteCount = ceil($byteCount / 8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes[$count]); for ($bit = 0; $bit < 8; $bit++) { if ($byte & 1 << $bit) { $this->_deleted[$count * 8 + $bit] = 1; } } } } } catch (Zend_Search_Exception $e) { if (strpos($e->getMessage(), 'is not readable') === false) { throw $e; } // There is no delete file // Do nothing } } else { // It's 2.1+ format delete file $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $format = $delFile->readInt(); if ($format == (int) 0xffffffff) { if (extension_loaded('bitset')) { $this->_deleted = bitset_empty(); } else { $this->_deleted = array(); } $byteCount = $delFile->readInt(); $bitCount = $delFile->readInt(); $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $byteNum = 0; do { $dgap = $delFile->readVInt(); $nonZeroByte = $delFile->readByte(); $byteNum += $dgap; for ($bit = 0; $bit < 8; $bit++) { if ($nonZeroByte & 1 << $bit) { if (extension_loaded('bitset')) { bitset_incl($this->_deleted, $byteNum * 8 + $bit); } else { $this->_deleted[$byteNum * 8 + $bit] = 1; } } } } while ($delFile->tell() < $delFileSize); } else { // $format is actually byte count $byteCount = ceil($format / 8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes[$count]); for ($bit = 0; $bit < 8; $bit++) { if ($byte & 1 << $bit) { $this->_deleted[$count * 8 + $bit] = 1; } } } } } } } }
/** * Zend_Search_Lucene_Index_SegmentInfo constructor * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name * @param integer $docCount * @param integer $delGen * @param boolean $isCompound */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null) { $this->_directory = $directory; $this->_name = $name; $this->_docCount = $docCount; $this->_hasSingleNormFile = $hasSingleNormFile; $this->_delGen = $delGen; $this->_termDictionary = null; if (!is_null($isCompound)) { $this->_isCompound = $isCompound; } else { // It's a pre-2.1 segment // detect if it uses compond file $this->_isCompound = true; try { // Try to open compound file $this->_directory->getFileObject($name . '.cfs'); } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { // Compound file is not found or is not readable $this->_isCompound = false; } else { throw $e; } } } $this->_segFiles = array(); if ($this->_isCompound) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count = 0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 1, $count, $fieldBits & 2); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); if ($this->_delGen == -1) { // There is no delete file for this segment // Do nothing } else { if ($this->_delGen == 0) { // It's a segment with pre-2.1 format delete file // Try to find delete file try { // '.del' files always stored in a separate file // Segment compound is not used $delFile = $this->_directory->getFileObject($this->_name . '.del'); $byteCount = $delFile->readInt(); $byteCount = ceil($byteCount / 8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes[$count]); for ($bit = 0; $bit < 8; $bit++) { if ($byte & 1 << $bit) { $this->_deleted[$count * 8 + $bit] = 1; } } } } } catch (Zend_Search_Exception $e) { if (strpos($e->getMessage(), 'is not readable') === false) { throw $e; } // There is no delete file // Do nothing } } else { // It's 2.1+ format delete file $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del'); $format = $delFile->readInt(); if ($format == (int) 0xffffffff) { /** * @todo Implement support of DGaps delete file format. * See Lucene file format for details - http://lucene.apache.org/java/docs/fileformats.html#Deleted%20Documents */ throw new Zend_Search_Lucene_Exception('DGaps delete file format is not supported. Optimize index to use it with Zend_Search_Lucene'); } else { // $format is actually byte count $byteCount = ceil($format / 8); $bitCount = $delFile->readInt(); if ($bitCount == 0) { $delBytes = ''; } else { $delBytes = $delFile->readBytes($byteCount); } if (extension_loaded('bitset')) { $this->_deleted = $delBytes; } else { $this->_deleted = array(); for ($count = 0; $count < $byteCount; $count++) { $byte = ord($delBytes[$count]); for ($bit = 0; $bit < 8; $bit++) { if ($byte & 1 << $bit) { $this->_deleted[$count * 8 + $bit] = 1; } } } } } } } }
/** * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, * Documents count and Directory as a parameter. * * @param string $name * @param integer $docCount * @param Zend_Search_Lucene_Storage_Directory $directory */ public function __construct($name, $docCount, $directory) { $this->_name = $name; $this->_docCount = $docCount; $this->_directory = $directory; $this->_termDictionary = null; $this->_segFiles = array(); $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count = 0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 1, $count, $fieldBits & 2); if ($fieldBits & 0x10) { // norms are omitted for the indexed field $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); }
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null) { $this->_directory = $directory; $this->_name = $name; $this->_docCount = $docCount; if ($docStoreOptions !== null) { $this->_usesSharedDocStore = true; $this->_sharedDocStoreOptions = $docStoreOptions; if ($docStoreOptions['isCompound']) { $cfxFile = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx'); $cfxFilesCount = $cfxFile->readVInt(); $cfxFiles = array(); $cfxFileSizes = array(); for ($count = 0; $count < $cfxFilesCount; $count++) { $dataOffset = $cfxFile->readLong(); if ($count != 0) { $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles); } $fileName = $cfxFile->readString(); $cfxFiles[$fileName] = $dataOffset; } if ($count != 0) { $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset; } $this->_sharedDocStoreOptions['files'] = $cfxFiles; $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes; } } $this->_hasSingleNormFile = $hasSingleNormFile; $this->_delGen = $delGen; $this->_termDictionary = null; if ($isCompound !== null) { $this->_isCompound = $isCompound; } else { try { $this->_directory->getFileObject($name . '.cfs'); $this->_isCompound = true; } catch (Zend_Search_Lucene_Exception $e) { if (strpos($e->getMessage(), 'is not readable') !== false) { $this->_isCompound = false; } else { throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); } } } $this->_segFiles = array(); if ($this->_isCompound) { $cfsFile = $this->_directory->getFileObject($name . '.cfs'); $segFilesCount = $cfsFile->readVInt(); for ($count = 0; $count < $segFilesCount; $count++) { $dataOffset = $cfsFile->readLong(); if ($count != 0) { $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles); } $fileName = $cfsFile->readString(); $this->_segFiles[$fileName] = $dataOffset; } if ($count != 0) { $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset; } } $fnmFile = $this->openCompoundFile('.fnm'); $fieldsCount = $fnmFile->readVInt(); $fieldNames = array(); $fieldNums = array(); $this->_fields = array(); for ($count = 0; $count < $fieldsCount; $count++) { $fieldName = $fnmFile->readString(); $fieldBits = $fnmFile->readByte(); $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, $fieldBits & 0x1, $count, $fieldBits & 0x2, $fieldBits & 0x10, $fieldBits & 0x20); if ($fieldBits & 0x10) { $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); } $fieldNums[$count] = $count; $fieldNames[$count] = $fieldName; } array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); $this->_fieldsDicPositions = array_flip($fieldNums); if ($this->_delGen == -2) { $this->_delGen = $this->_detectLatestDelGen(); } $this->_deleted = $this->_loadDelFile(); }