/** * Scans terms dictionary and returns next term * * @return Zend_Search_Lucene_Index_Term|null */ public function nextTerm() { if ($this->_tisFile === null || $this->_termCount == 0) { $this->_lastTerm = null; $this->_lastTermInfo = null; $this->_lastTermPositions = null; $this->_docMap = null; // may be necessary for "empty" segment $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; return null; } $termPrefixLength = $this->_tisFile->readVInt(); $termSuffix = $this->_tisFile->readString(); $termFieldNum = $this->_tisFile->readVInt(); $termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix; $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name); $docFreq = $this->_tisFile->readVInt(); $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt(); $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt(); if ($docFreq >= $this->_skipInterval) { $skipOffset = $this->_tisFile->readVInt(); } else { $skipOffset = 0; } $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) { $this->_lastTermPositions = array(); $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET); $freqs = array(); $docId = 0; for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) { $docDelta = $this->_frqFile->readVInt(); if( $docDelta % 2 == 1 ) { $docId += ($docDelta-1)/2; $freqs[ $docId ] = 1; } else { $docId += $docDelta/2; $freqs[ $docId ] = $this->_frqFile->readVInt(); } } $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET); foreach ($freqs as $docId => $freq) { $termPosition = 0; $positions = array(); for ($count = 0; $count < $freq; $count++ ) { $termPosition += $this->_prxFile->readVInt(); $positions[] = $termPosition; } if (isset($this->_docMap[$docId])) { $this->_lastTermPositions[$this->_docMap[$docId]] = $positions; } } } $this->_termCount--; if ($this->_termCount == 0) { $this->_tisFile = null; $this->_frqFile = null; $this->_prxFile = null; } return $this->_lastTerm; }
/** * Close current index and free resources */ private function _close() { if ($this->_closed) { // index is already closed and resources are cleaned up return; } $this->commit(); // Free shared lock $this->_lock->unlock(); if ($this->_closeDirOnExit) { $this->_directory->close(); } $this->_directory = null; $this->_writer = null; $this->_segmentInfos = null; $this->_closed = true; }
/** * Dump Term Dictionary segment file entry. * Used to write entry to .tis or .tii files * * @param Zend_Search_Lucene_Storage_File $dicFile * @param Zend_Search_Lucene_Index_Term $prevTerm * @param Zend_Search_Lucene_Index_Term $term * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo * @param Zend_Search_Lucene_Index_TermInfo $termInfo */ protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $matchedBytes = 0; $maxBytes = min(strlen($prevTerm->text), strlen($term->text)); while ($matchedBytes < $maxBytes && $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { $matchedBytes++; } // Calculate actual matched UTF-8 pattern $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < $matchedBytes) { $charBytes = 1; if ((ord($term->text[$prefixBytes]) & 0xc0) == 0xc0) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x20) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x10) { $charBytes++; } } } if ($prefixBytes + $charBytes > $matchedBytes) { // char crosses matched bytes boundary // skip char break; } $prefixChars++; $prefixBytes += $charBytes; } // Write preffix length $dicFile->writeVInt($prefixChars); // Write suffix $dicFile->writeString(substr($term->text, $prefixBytes)); } else { // Write preffix length $dicFile->writeVInt(0); // Write suffix $dicFile->writeString($term->text); } // Write field number $dicFile->writeVInt($term->field); // DocFreq (the count of documents which contain the term) $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer); } else { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }
/** * Object destructor */ public function __destruct() { $this->commit(); // Free shared lock $this->_lock->unlock(); if ($this->_closeDirOnExit) { $this->_directory->close(); } }
/** * Dump Term Dictionary segment file entry. * Used to write entry to .tis or .tii files * * @param Zend_Search_Lucene_Storage_File $dicFile * @param Zend_Search_Lucene_Index_Term $prevTerm * @param Zend_Search_Lucene_Index_Term $term * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo * @param Zend_Search_Lucene_Index_TermInfo $termInfo */ protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $prefixLength = 0; while ($prefixLength < strlen($prevTerm->text) && $prefixLength < strlen($term->text) && $prevTerm->text[$prefixLength] == $term->text[$prefixLength]) { $prefixLength++; } // Write preffix length $dicFile->writeVInt($prefixLength); // Write suffix $dicFile->writeString(substr($term->text, $prefixLength)); } else { // Write preffix length $dicFile->writeVInt(0); // Write suffix $dicFile->writeString($term->text); } // Write field number $dicFile->writeVInt($term->field); // DocFreq (the count of documents which contain the term) $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer); } else { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }
/** * Free exclusive write lock on the index * * @param Zend_Search_Lucene_Storage_Directory $defaultLockDirectory * @param Zend_Search_Lucene_Storage_File $lock */ public static function releaseWriteLock(Zend_Search_Lucene_Storage_Directory $defaultLockDirectory, Zend_Search_Lucene_Storage_File $lock) { $lock->unlock(); unset($lock); $defaultLockDirectory->deleteFile('write.lock'); }
/** * Close current index and free resources * * @internal */ public function close() { $this->commit(); // Free shared lock $this->_lock->unlock(); if ($this->_closeDirOnExit) { $this->_directory->close(); } $this->_directory = null; $this->_writer = null; $this->_segmentInfos = null; }
protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $matchedBytes = 0; $maxBytes = min(strlen($prevTerm->text), strlen($term->text)); while ($matchedBytes < $maxBytes && $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { $matchedBytes++; } $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < $matchedBytes) { $charBytes = 1; if ((ord($term->text[$prefixBytes]) & 0xc0) == 0xc0) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x20) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x10) { $charBytes++; } } } if ($prefixBytes + $charBytes > $matchedBytes) { break; } $prefixChars++; $prefixBytes += $charBytes; } $dicFile->writeVInt($prefixChars); $dicFile->writeString(substr($term->text, $prefixBytes)); } else { $dicFile->writeVInt(0); $dicFile->writeString($term->text); } $dicFile->writeVInt($term->field); $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { $dicFile->writeVInt($termInfo->freqPointer); $dicFile->writeVInt($termInfo->proxPointer); } else { $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }