public function testMerge()
    {
        $segmentsDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source');
        $outputDirectory   = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files');
        $segmentsList = array('_0', '_1', '_2', '_3', '_4');

        $segmentMerger = new Zend_Search_Lucene_Index_SegmentMerger($outputDirectory, 'mergedSegment');

        foreach ($segmentsList as $segmentName) {
            $segmentMerger->addSource(new Zend_Search_Lucene_Index_SegmentInfo($segmentName, 2, $segmentsDirectory));
        }

        $mergedSegment = $segmentMerger->merge();
        $this->assertTrue($mergedSegment instanceof Zend_Search_Lucene_Index_SegmentInfo);
        unset($mergedSegment);

        $mergedFile = $outputDirectory->getFileObject('mergedSegment.cfs');
        $mergedFileData = $mergedFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs'));

        $sampleFile = $outputDirectory->getFileObject('mergedSegment.cfs.sample');
        $sampleFileData = $sampleFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs.sample'));

        $this->assertEquals($mergedFileData, $sampleFileData);

        $outputDirectory->deleteFile('mergedSegment.cfs');
    }
Beispiel #2
0
 public function testFilesystem()
 {
     $tempPath = dirname(__FILE__) . '/_tempFiles/_files';
     if (is_dir($tempPath)) {
         // remove files from temporary direcytory
         $dir = opendir($tempPath);
         while (($file = readdir($dir)) !== false) {
             if (!is_dir($tempPath . '/' . $file)) {
                 @unlink($tempPath . '/' . $file);
             }
         }
         closedir($dir);
     }
     $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($tempPath);
     $this->assertTrue($directory instanceof Zend_Search_Lucene_Storage_Directory);
     $this->assertEquals(count($directory->fileList()), 0);
     $fileObject = $directory->createFile('file1');
     $this->assertTrue($fileObject instanceof Zend_Search_Lucene_Storage_File);
     unset($fileObject);
     $this->assertEquals($directory->fileLength('file1'), 0);
     $this->assertEquals(count(array_diff($directory->fileList(), array('file1'))), 0);
     $directory->deleteFile('file1');
     $this->assertEquals(count($directory->fileList()), 0);
     $this->assertFalse($directory->fileExists('file2'));
     $fileObject = $directory->createFile('file2');
     $this->assertEquals($directory->fileLength('file2'), 0);
     $fileObject->writeBytes('0123456789');
     unset($fileObject);
     $this->assertEquals($directory->fileLength('file2'), 10);
     $directory->renameFile('file2', 'file3');
     $this->assertEquals(count(array_diff($directory->fileList(), array('file3'))), 0);
     $modifiedAt1 = $directory->fileModified('file3');
     clearstatcache();
     $directory->touchFile('file3');
     $modifiedAt2 = $directory->fileModified('file3');
     sleep(1);
     clearstatcache();
     $directory->touchFile('file3');
     $modifiedAt3 = $directory->fileModified('file3');
     $this->assertTrue($modifiedAt2 >= $modifiedAt1);
     $this->assertTrue($modifiedAt3 > $modifiedAt2);
     $fileObject = $directory->getFileObject('file3');
     $this->assertEquals($fileObject->readBytes($directory->fileLength('file3')), '0123456789');
     unset($fileObject);
     $fileObject = $directory->createFile('file3');
     $this->assertEquals($fileObject->readBytes($directory->fileLength('file3')), '');
     unset($fileObject);
     $directory->deleteFile('file3');
     $this->assertEquals(count($directory->fileList()), 0);
     $directory->close();
 }
 public function testCreate()
 {
     $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source');
     $stiFile = $directory->getFileObject('_1.sti');
     $stiFileData = $stiFile->readBytes($directory->fileLength('_1.sti'));
     // Load dictionary index data
     list($termDictionary, $termDictionaryInfos) = unserialize($stiFileData);
     $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo('_1', 2, $directory);
     $tiiFile = $segmentInfo->openCompoundFile('.tii');
     $tiiFileData = $tiiFile->readBytes($segmentInfo->compoundFileLength('.tii'));
     // Load dictionary index data
     list($loadedTermDictionary, $loadedTermDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
     $this->assertTrue($termDictionary == $loadedTermDictionary);
     $this->assertTrue($termDictionaryInfos == $loadedTermDictionaryInfos);
 }
Beispiel #4
0
    /**
     * Load terms dictionary index
     *
     * @throws Zend_Search_Lucene_Exception
     */
    private function _loadDictionaryIndex()
    {
        // Check, if index is already serialized
        if ($this->_directory->fileExists($this->_name . '.sti')) {
            // Load serialized dictionary index data
            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));

            // Load dictionary index data
            if (($unserializedData = @unserialize($stiFileData)) !== false) {
                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
                return;
            }
        }

        // Load data from .tii file and generate .sti file

        // Prefetch dictionary index data
        $tiiFile = $this->openCompoundFile('.tii');
        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));

        // Load dictionary index data
        list($this->_termDictionary, $this->_termDictionaryInfos) =
                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);

        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
        $stiFile = $this->_directory->createFile($this->_name . '.sti');
        $stiFile->writeBytes($stiFileData);
    }
 /**
  * Scans terms dictionary and returns term info
  *
  * @param Zend_Search_Lucene_Index_Term $term
  * @return Zend_Search_Lucene_Index_TermInfo
  */
 public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
 {
     $termKey = $term->key();
     if (isset($this->_termInfoCache[$termKey])) {
         $termInfo = $this->_termInfoCache[$termKey];
         // Move termInfo to the end of cache
         unset($this->_termInfoCache[$termKey]);
         $this->_termInfoCache[$termKey] = $termInfo;
         return $termInfo;
     }
     if ($this->_termDictionary === null) {
         // Check, if index is already serialized
         if ($this->_directory->fileExists($this->_name . '.sti')) {
             // Prefetch dictionary index data
             $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
             $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
             // Load dictionary index data
             list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData);
         } else {
             // Prefetch dictionary index data
             $tiiFile = $this->openCompoundFile('.tii');
             $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
             // Load dictionary index data
             list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
             $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
             $stiFile = $this->_directory->createFile($this->_name . '.sti');
             $stiFile->writeBytes($stiFileData);
         }
     }
     $searchField = $this->getFieldNum($term->field);
     if ($searchField == -1) {
         return null;
     }
     $searchDicField = $this->_getFieldPosition($searchField);
     // search for appropriate value in dictionary
     $lowIndex = 0;
     $highIndex = count($this->_termDictionary) - 1;
     while ($highIndex >= $lowIndex) {
         // $mid = ($highIndex - $lowIndex)/2;
         $mid = $highIndex + $lowIndex >> 1;
         $midTerm = $this->_termDictionary[$mid];
         $fieldNum = $this->_getFieldPosition($midTerm[0]);
         $delta = $searchDicField - $fieldNum;
         if ($delta == 0) {
             $delta = strcmp($term->text, $midTerm[1]);
         }
         if ($delta < 0) {
             $highIndex = $mid - 1;
         } elseif ($delta > 0) {
             $lowIndex = $mid + 1;
         } else {
             // return $this->_termDictionaryInfos[$mid]; // We got it!
             $a = $this->_termDictionaryInfos[$mid];
             $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
             // Put loaded termInfo into cache
             $this->_termInfoCache[$termKey] = $termInfo;
             return $termInfo;
         }
     }
     if ($highIndex == -1) {
         // Term is out of the dictionary range
         return null;
     }
     $prevPosition = $highIndex;
     $prevTerm = $this->_termDictionary[$prevPosition];
     $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
     $tisFile = $this->openCompoundFile('.tis');
     $tiVersion = $tisFile->readInt();
     if ($tiVersion != (int) 0.0) {
         throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
     }
     $termCount = $tisFile->readLong();
     $indexInterval = $tisFile->readInt();
     $skipInterval = $tisFile->readInt();
     $tisFile->seek($prevTermInfo[4] - 20, SEEK_CUR);
     $termValue = $prevTerm[1];
     $termFieldNum = $prevTerm[0];
     $freqPointer = $prevTermInfo[1];
     $proxPointer = $prevTermInfo[2];
     for ($count = $prevPosition * $indexInterval + 1; $count <= $termCount && ($this->_getFieldPosition($termFieldNum) < $searchDicField || $this->_getFieldPosition($termFieldNum) == $searchDicField && strcmp($termValue, $term->text) < 0); $count++) {
         $termPrefixLength = $tisFile->readVInt();
         $termSuffix = $tisFile->readString();
         $termFieldNum = $tisFile->readVInt();
         $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
         $docFreq = $tisFile->readVInt();
         $freqPointer += $tisFile->readVInt();
         $proxPointer += $tisFile->readVInt();
         if ($docFreq >= $skipInterval) {
             $skipOffset = $tisFile->readVInt();
         } else {
             $skipOffset = 0;
         }
     }
     if ($termFieldNum == $searchField && $termValue == $term->text) {
         $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
     } else {
         $termInfo = null;
     }
     // Put loaded termInfo into cache
     $this->_termInfoCache[$termKey] = $termInfo;
     if (count($this->_termInfoCache) == 1024) {
         $this->_cleanUpTermInfoCache();
     }
     return $termInfo;
 }
 /**
  * Get compound file length
  *
  * @param string $extension
  * @return integer
  */
 public function compoundFileLength($extension)
 {
     $filename = $this->_name . $extension;
     // Try to get common file first
     if ($this->_directory->fileExists($filename)) {
         return $this->_directory->fileLength($filename);
     }
     if (!isset($this->_segFileSizes[$filename])) {
         throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.');
     }
     return $this->_segFileSizes[$filename];
 }