/** * Dump Term Dictionary segment file entry. * Used to write entry to .tis or .tii files * * @param \Zend\Search\Lucene\Storage\File $dicFile * @param \Zend\Search\Lucene\Index\Term $prevTerm * @param \Zend\Search\Lucene\Index\Term $term * @param \Zend\Search\Lucene\Index\TermInfo $prevTermInfo * @param \Zend\Search\Lucene\Index\TermInfo $termInfo */ protected function _dumpTermDictEntry(File $dicFile, &$prevTerm, Index\Term $term, &$prevTermInfo, Index\TermInfo $termInfo) { if (isset($prevTerm) && $prevTerm->field == $term->field) { $matchedBytes = 0; $maxBytes = min(strlen($prevTerm->text), strlen($term->text)); while ($matchedBytes < $maxBytes && $prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { $matchedBytes++; } // Calculate actual matched UTF-8 pattern $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < $matchedBytes) { $charBytes = 1; if ((ord($term->text[$prefixBytes]) & 0xc0) == 0xc0) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x20) { $charBytes++; if (ord($term->text[$prefixBytes]) & 0x10) { $charBytes++; } } } if ($prefixBytes + $charBytes > $matchedBytes) { // char crosses matched bytes boundary // skip char break; } $prefixChars++; $prefixBytes += $charBytes; } // Write preffix length $dicFile->writeVInt($prefixChars); // Write suffix $dicFile->writeString(substr($term->text, $prefixBytes)); } else { // Write preffix length $dicFile->writeVInt(0); // Write suffix $dicFile->writeString($term->text); } // Write field number $dicFile->writeVInt($term->field); // DocFreq (the count of documents which contain the term) $dicFile->writeVInt($termInfo->docFreq); $prevTerm = $term; if (!isset($prevTermInfo)) { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer); } else { // Write FreqDelta $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); // Write ProxDelta $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); } // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval if ($termInfo->skipOffset != 0) { $dicFile->writeVInt($termInfo->skipOffset); } $prevTermInfo = $termInfo; }