public function testMerge() { $segmentsDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source'); $outputDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files'); $segmentsList = array('_0', '_1', '_2', '_3', '_4'); $segmentMerger = new Zend_Search_Lucene_Index_SegmentMerger($outputDirectory, 'mergedSegment'); foreach ($segmentsList as $segmentName) { $segmentMerger->addSource(new Zend_Search_Lucene_Index_SegmentInfo($segmentName, 2, $segmentsDirectory)); } $mergedSegment = $segmentMerger->merge(); $this->assertTrue($mergedSegment instanceof Zend_Search_Lucene_Index_SegmentInfo); unset($mergedSegment); $mergedFile = $outputDirectory->getFileObject('mergedSegment.cfs'); $mergedFileData = $mergedFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs')); $sampleFile = $outputDirectory->getFileObject('mergedSegment.cfs.sample'); $sampleFileData = $sampleFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs.sample')); $this->assertEquals($mergedFileData, $sampleFileData); $outputDirectory->deleteFile('mergedSegment.cfs'); }
public function testFilesystem() { $tempPath = dirname(__FILE__) . '/_tempFiles/_files'; if (is_dir($tempPath)) { // remove files from temporary direcytory $dir = opendir($tempPath); while (($file = readdir($dir)) !== false) { if (!is_dir($tempPath . '/' . $file)) { @unlink($tempPath . '/' . $file); } } closedir($dir); } $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($tempPath); $this->assertTrue($directory instanceof Zend_Search_Lucene_Storage_Directory); $this->assertEquals(count($directory->fileList()), 0); $fileObject = $directory->createFile('file1'); $this->assertTrue($fileObject instanceof Zend_Search_Lucene_Storage_File); unset($fileObject); $this->assertEquals($directory->fileLength('file1'), 0); $this->assertEquals(count(array_diff($directory->fileList(), array('file1'))), 0); $directory->deleteFile('file1'); $this->assertEquals(count($directory->fileList()), 0); $this->assertFalse($directory->fileExists('file2')); $fileObject = $directory->createFile('file2'); $this->assertEquals($directory->fileLength('file2'), 0); $fileObject->writeBytes('0123456789'); unset($fileObject); $this->assertEquals($directory->fileLength('file2'), 10); $directory->renameFile('file2', 'file3'); $this->assertEquals(count(array_diff($directory->fileList(), array('file3'))), 0); $modifiedAt1 = $directory->fileModified('file3'); clearstatcache(); $directory->touchFile('file3'); $modifiedAt2 = $directory->fileModified('file3'); sleep(1); clearstatcache(); $directory->touchFile('file3'); $modifiedAt3 = $directory->fileModified('file3'); $this->assertTrue($modifiedAt2 >= $modifiedAt1); $this->assertTrue($modifiedAt3 > $modifiedAt2); $fileObject = $directory->getFileObject('file3'); $this->assertEquals($fileObject->readBytes($directory->fileLength('file3')), '0123456789'); unset($fileObject); $fileObject = $directory->createFile('file3'); $this->assertEquals($fileObject->readBytes($directory->fileLength('file3')), ''); unset($fileObject); $directory->deleteFile('file3'); $this->assertEquals(count($directory->fileList()), 0); $directory->close(); }
public function testCreate() { $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source'); $stiFile = $directory->getFileObject('_1.sti'); $stiFileData = $stiFile->readBytes($directory->fileLength('_1.sti')); // Load dictionary index data list($termDictionary, $termDictionaryInfos) = unserialize($stiFileData); $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo('_1', 2, $directory); $tiiFile = $segmentInfo->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($segmentInfo->compoundFileLength('.tii')); // Load dictionary index data list($loadedTermDictionary, $loadedTermDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $this->assertTrue($termDictionary == $loadedTermDictionary); $this->assertTrue($termDictionaryInfos == $loadedTermDictionaryInfos); }
/** * Load terms dictionary index * * @throws Zend_Search_Lucene_Exception */ private function _loadDictionaryIndex() { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Load serialized dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data if (($unserializedData = @unserialize($stiFileData)) !== false) { list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData; return; } } // Load data from .tii file and generate .sti file // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); }
/** * Scans terms dictionary and returns term info * * @param Zend_Search_Lucene_Index_Term $term * @return Zend_Search_Lucene_Index_TermInfo */ public function getTermInfo(Zend_Search_Lucene_Index_Term $term) { $termKey = $term->key(); if (isset($this->_termInfoCache[$termKey])) { $termInfo = $this->_termInfoCache[$termKey]; // Move termInfo to the end of cache unset($this->_termInfoCache[$termKey]); $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } if ($this->_termDictionary === null) { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Prefetch dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData); } else { // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); } } $searchField = $this->getFieldNum($term->field); if ($searchField == -1) { return null; } $searchDicField = $this->_getFieldPosition($searchField); // search for appropriate value in dictionary $lowIndex = 0; $highIndex = count($this->_termDictionary) - 1; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = $highIndex + $lowIndex >> 1; $midTerm = $this->_termDictionary[$mid]; $fieldNum = $this->_getFieldPosition($midTerm[0]); $delta = $searchDicField - $fieldNum; if ($delta == 0) { $delta = strcmp($term->text, $midTerm[1]); } if ($delta < 0) { $highIndex = $mid - 1; } elseif ($delta > 0) { $lowIndex = $mid + 1; } else { // return $this->_termDictionaryInfos[$mid]; // We got it! $a = $this->_termDictionaryInfos[$mid]; $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]); // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } } if ($highIndex == -1) { // Term is out of the dictionary range return null; } $prevPosition = $highIndex; $prevTerm = $this->_termDictionary[$prevPosition]; $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; $tisFile = $this->openCompoundFile('.tis'); $tiVersion = $tisFile->readInt(); if ($tiVersion != (int) 0.0) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $termCount = $tisFile->readLong(); $indexInterval = $tisFile->readInt(); $skipInterval = $tisFile->readInt(); $tisFile->seek($prevTermInfo[4] - 20, SEEK_CUR); $termValue = $prevTerm[1]; $termFieldNum = $prevTerm[0]; $freqPointer = $prevTermInfo[1]; $proxPointer = $prevTermInfo[2]; for ($count = $prevPosition * $indexInterval + 1; $count <= $termCount && ($this->_getFieldPosition($termFieldNum) < $searchDicField || $this->_getFieldPosition($termFieldNum) == $searchDicField && strcmp($termValue, $term->text) < 0); $count++) { $termPrefixLength = $tisFile->readVInt(); $termSuffix = $tisFile->readString(); $termFieldNum = $tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix; $docFreq = $tisFile->readVInt(); $freqPointer += $tisFile->readVInt(); $proxPointer += $tisFile->readVInt(); if ($docFreq >= $skipInterval) { $skipOffset = $tisFile->readVInt(); } else { $skipOffset = 0; } } if ($termFieldNum == $searchField && $termValue == $term->text) { $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); } else { $termInfo = null; } // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; if (count($this->_termInfoCache) == 1024) { $this->_cleanUpTermInfoCache(); } return $termInfo; }
/** * Get compound file length * * @param string $extension * @return integer */ public function compoundFileLength($extension) { $filename = $this->_name . $extension; // Try to get common file first if ($this->_directory->fileExists($filename)) { return $this->_directory->fileLength($filename); } if (!isset($this->_segFileSizes[$filename])) { throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.'); } return $this->_segFileSizes[$filename]; }