public function testCreate() { $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source'); $stiFile = $directory->getFileObject('_1.sti'); $stiFileData = $stiFile->readBytes($directory->fileLength('_1.sti')); // Load dictionary index data list($termDictionary, $termDictionaryInfos) = unserialize($stiFileData); $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo('_1', 2, $directory); $tiiFile = $segmentInfo->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($segmentInfo->compoundFileLength('.tii')); // Load dictionary index data list($loadedTermDictionary, $loadedTermDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $this->assertTrue($termDictionary == $loadedTermDictionary); $this->assertTrue($termDictionaryInfos == $loadedTermDictionaryInfos); }
/** * Load terms dictionary index * * @throws Zend_Search_Lucene_Exception */ private function _loadDictionaryIndex() { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Load serialized dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data if (($unserializedData = @unserialize($stiFileData)) !== false) { list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData; return; } } // Load data from .tii file and generate .sti file // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); }
/** * Scans terms dictionary and returns term info * * @param Zend_Search_Lucene_Index_Term $term * @return Zend_Search_Lucene_Index_TermInfo */ public function getTermInfo(Zend_Search_Lucene_Index_Term $term) { $termKey = $term->key(); if (isset($this->_termInfoCache[$termKey])) { $termInfo = $this->_termInfoCache[$termKey]; // Move termInfo to the end of cache unset($this->_termInfoCache[$termKey]); $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } if ($this->_termDictionary === null) { // Check, if index is already serialized if ($this->_directory->fileExists($this->_name . '.sti')) { // Prefetch dictionary index data $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData); } else { // Prefetch dictionary index data $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); // Load dictionary index data list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); } } $searchField = $this->getFieldNum($term->field); if ($searchField == -1) { return null; } $searchDicField = $this->_getFieldPosition($searchField); // search for appropriate value in dictionary $lowIndex = 0; $highIndex = count($this->_termDictionary) - 1; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = $highIndex + $lowIndex >> 1; $midTerm = $this->_termDictionary[$mid]; $fieldNum = $this->_getFieldPosition($midTerm[0]); $delta = $searchDicField - $fieldNum; if ($delta == 0) { $delta = strcmp($term->text, $midTerm[1]); } if ($delta < 0) { $highIndex = $mid - 1; } elseif ($delta > 0) { $lowIndex = $mid + 1; } else { // return $this->_termDictionaryInfos[$mid]; // We got it! $a = $this->_termDictionaryInfos[$mid]; $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]); // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; return $termInfo; } } if ($highIndex == -1) { // Term is out of the dictionary range return null; } $prevPosition = $highIndex; $prevTerm = $this->_termDictionary[$prevPosition]; $prevTermInfo = $this->_termDictionaryInfos[$prevPosition]; $tisFile = $this->openCompoundFile('.tis'); $tiVersion = $tisFile->readInt(); if ($tiVersion != (int) 0.0) { throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); } $termCount = $tisFile->readLong(); $indexInterval = $tisFile->readInt(); $skipInterval = $tisFile->readInt(); $tisFile->seek($prevTermInfo[4] - 20, SEEK_CUR); $termValue = $prevTerm[1]; $termFieldNum = $prevTerm[0]; $freqPointer = $prevTermInfo[1]; $proxPointer = $prevTermInfo[2]; for ($count = $prevPosition * $indexInterval + 1; $count <= $termCount && ($this->_getFieldPosition($termFieldNum) < $searchDicField || $this->_getFieldPosition($termFieldNum) == $searchDicField && strcmp($termValue, $term->text) < 0); $count++) { $termPrefixLength = $tisFile->readVInt(); $termSuffix = $tisFile->readString(); $termFieldNum = $tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix; $docFreq = $tisFile->readVInt(); $freqPointer += $tisFile->readVInt(); $proxPointer += $tisFile->readVInt(); if ($docFreq >= $skipInterval) { $skipOffset = $tisFile->readVInt(); } else { $skipOffset = 0; } } if ($termFieldNum == $searchField && $termValue == $term->text) { $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); } else { $termInfo = null; } // Put loaded termInfo into cache $this->_termInfoCache[$termKey] = $termInfo; if (count($this->_termInfoCache) == 1024) { $this->_cleanUpTermInfoCache(); } return $termInfo; }
private function _loadDictionaryIndex() { if ($this->_directory->fileExists($this->_name . '.sti')) { $stiFile = $this->_directory->getFileObject($this->_name . '.sti'); $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti')); if (($unserializedData = @unserialize($stiFileData)) !== false) { list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData; return; } } $tiiFile = $this->openCompoundFile('.tii'); $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii')); list($this->_termDictionary, $this->_termDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData); $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos)); $stiFile = $this->_directory->createFile($this->_name . '.sti'); $stiFile->writeBytes($stiFileData); }