/** * Dump Term Dictionary segment file entry. * Used to write entry to .tis or .tii files * * @param Zend_Search_Lucene_Storage_File $dicFile * @param Zend_Search_Lucene_Index_Term $prevTerm * @param Zend_Search_Lucene_Index_Term $term * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo * @param Zend_Search_Lucene_Index_TermInfo $termInfo */ protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $prefixLength = 0; while ($prefixLength < strlen($prevTerm->text) && $prefixLength < strlen($term->text) && $prevTerm->text[$prefixLength] == $term->text[$prefixLength]) { $prefixLength++; } // Write preffix length $dicFile->writeVInt($prefixLength); // Write suffix $dicFile->writeString(substr($term->text, $prefixLength)); } else { // Write preffix length $dicFile->writeVInt(0); // Write suffix $dicFile->writeString($term->text); } // Write field number $dicFile->writeVInt($term->field); // DocFreq (the count of documents which contain the term) $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer); } else { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }
/** * Dump Term Dictionary segment file entry. * Used to write entry to .tis or .tii files * * @param Zend_Search_Lucene_Storage_File $dicFile * @param Zend_Search_Lucene_Index_Term $prevTerm * @param Zend_Search_Lucene_Index_Term $term * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo * @param Zend_Search_Lucene_Index_TermInfo $termInfo */ protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $matchedBytes = 0; $maxBytes = min(strlen($prevTerm->text), strlen($term->text)); while ($matchedBytes < $maxBytes && $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { $matchedBytes++; } // Calculate actual matched UTF-8 pattern $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < $matchedBytes) { $charBytes = 1; if ((ord($term->text[$prefixBytes]) & 0xc0) == 0xc0) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x20) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x10) { $charBytes++; } } } if ($prefixBytes + $charBytes > $matchedBytes) { // char crosses matched bytes boundary // skip char break; } $prefixChars++; $prefixBytes += $charBytes; } // Write preffix length $dicFile->writeVInt($prefixChars); // Write suffix $dicFile->writeString(substr($term->text, $prefixBytes)); } else { // Write preffix length $dicFile->writeVInt(0); // Write suffix $dicFile->writeString($term->text); } // Write field number $dicFile->writeVInt($term->field); // DocFreq (the count of documents which contain the term) $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer); } else { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }
protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, &$prevTerm, Zend_Search_Lucene_Index_Term $term, &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $matchedBytes = 0; $maxBytes = min(strlen($prevTerm->text), strlen($term->text)); while ($matchedBytes < $maxBytes && $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { $matchedBytes++; } $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < $matchedBytes) { $charBytes = 1; if ((ord($term->text[$prefixBytes]) & 0xc0) == 0xc0) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x20) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x10) { $charBytes++; } } } if ($prefixBytes + $charBytes > $matchedBytes) { break; } $prefixChars++; $prefixBytes += $charBytes; } $dicFile->writeVInt($prefixChars); $dicFile->writeString(substr($term->text, $prefixBytes)); } else { $dicFile->writeVInt(0); $dicFile->writeString($term->text); } $dicFile->writeVInt($term->field); $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { $dicFile->writeVInt($termInfo->freqPointer); $dicFile->writeVInt($termInfo->proxPointer); } else { $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }