/** * Add term * * Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... ) * * @param \ZendSearch\Lucene\Index\Term $termEntry * @param array $termDocs */ public function addTerm($termEntry, $termDocs) { $freqPointer = $this->_frqFile->tell(); $proxPointer = $this->_prxFile->tell(); $prevDoc = 0; foreach ($termDocs as $docId => $termPositions) { $docDelta = ($docId - $prevDoc) * 2; $prevDoc = $docId; if (count($termPositions) > 1) { $this->_frqFile->writeVInt($docDelta); $this->_frqFile->writeVInt(count($termPositions)); } else { $this->_frqFile->writeVInt($docDelta + 1); } $prevPosition = 0; foreach ($termPositions as $position) { $this->_prxFile->writeVInt($position - $prevPosition); $prevPosition = $position; } } if (count($termDocs) >= self::$skipInterval) { /** * @todo Write Skip Data to a freq file. * It's not used now, but make index more optimal */ $skipOffset = $this->_frqFile->tell() - $freqPointer; } else { $skipOffset = 0; } $term = new Index\Term($termEntry->text, $this->_fields[$termEntry->field]->number); $termInfo = new Index\TermInfo(count($termDocs), $freqPointer, $proxPointer, $skipOffset); $this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo); if (($this->_termCount + 1) % self::$indexInterval == 0) { $this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo); $indexPosition = $this->_tisFile->tell(); $this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition); $this->_lastIndexPosition = $indexPosition; } $this->_termCount++; }
/** * Reset terms stream * * $startId - id for the fist document * $compact - remove deleted documents * * Returns start document id for the next segment * * @param integer $startId * @param integer $mode * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException * @throws \ZendSearch\Lucene\Exception\InvalidFileFormatException * @return integer */ public function resetTermsStream() { /** * SegmentInfo->resetTermsStream() method actually takes two optional parameters: * $startId (default value is 0) * $mode (default value is self::SM_TERMS_ONLY) */ $argList = func_get_args(); if (count($argList) > 2) { throw new InvalidArgumentException('Wrong number of arguments'); } elseif (count($argList) == 2) { $startId = $argList[0]; $mode = $argList[1]; } elseif (count($argList) == 1) { $startId = $argList[0]; $mode = self::SM_TERMS_ONLY; } else { $startId = 0; $mode = self::SM_TERMS_ONLY; } if ($this->_tisFile !== null) { $this->_tisFile = null; } $this->_tisFile = $this->openCompoundFile('.tis', false); $this->_tisFileOffset = $this->_tisFile->tell(); $tiVersion = $this->_tisFile->readInt(); if ($tiVersion != (int) 0xfffffffe && $tiVersion != (int) 0xfffffffd) { throw new InvalidFileFormatException('Wrong TermInfoFile file format'); } $this->_termCount = $this->_termNum = $this->_tisFile->readLong(); // Read terms count $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval if ($tiVersion == (int) 0xfffffffd) { $maxSkipLevels = $this->_tisFile->readInt(); } if ($this->_frqFile !== null) { $this->_frqFile = null; } if ($this->_prxFile !== null) { $this->_prxFile = null; } $this->_docMap = array(); $this->_lastTerm = new Term('', -1); $this->_lastTermInfo = new TermInfo(0, 0, 0, 0); $this->_lastTermPositions = null; $this->_termsScanMode = $mode; switch ($mode) { case self::SM_TERMS_ONLY: // Do nothing break; case self::SM_FULL_INFO: // break intentionally omitted // break intentionally omitted case self::SM_MERGE_INFO: $this->_frqFile = $this->openCompoundFile('.frq', false); $this->_frqFileOffset = $this->_frqFile->tell(); $this->_prxFile = $this->openCompoundFile('.prx', false); $this->_prxFileOffset = $this->_prxFile->tell(); for ($count = 0; $count < $this->_docCount; $count++) { if (!$this->isDeleted($count)) { $this->_docMap[$count] = $startId + ($mode == self::SM_MERGE_INFO ? count($this->_docMap) : $count); } } break; default: throw new InvalidArgumentException('Wrong terms scaning mode specified.'); break; } // Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call) $nextSegmentStartId = $startId + ($mode == self::SM_MERGE_INFO ? count($this->_docMap) : $this->_docCount); $this->nextTerm(); return $nextSegmentStartId; }