/**
  * Add term
  *
  * Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... )
  *
  * @param \Zend\Search\Lucene\Index\Term $termEntry
  * @param array $termDocs
  */
 public function addTerm($termEntry, $termDocs)
 {
     $freqPointer = $this->_frqFile->tell();
     $proxPointer = $this->_prxFile->tell();
     $prevDoc = 0;
     foreach ($termDocs as $docId => $termPositions) {
         $docDelta = ($docId - $prevDoc) * 2;
         $prevDoc = $docId;
         if (count($termPositions) > 1) {
             $this->_frqFile->writeVInt($docDelta);
             $this->_frqFile->writeVInt(count($termPositions));
         } else {
             $this->_frqFile->writeVInt($docDelta + 1);
         }
         $prevPosition = 0;
         foreach ($termPositions as $position) {
             $this->_prxFile->writeVInt($position - $prevPosition);
             $prevPosition = $position;
         }
     }
     if (count($termDocs) >= self::$skipInterval) {
         /**
          * @todo Write Skip Data to a freq file.
          * It's not used now, but make index more optimal
          */
         $skipOffset = $this->_frqFile->tell() - $freqPointer;
     } else {
         $skipOffset = 0;
     }
     $term = new Index\Term($termEntry->text, $this->_fields[$termEntry->field]->number);
     $termInfo = new Index\TermInfo(count($termDocs), $freqPointer, $proxPointer, $skipOffset);
     $this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo);
     if (($this->_termCount + 1) % self::$indexInterval == 0) {
         $this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo);
         $indexPosition = $this->_tisFile->tell();
         $this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition);
         $this->_lastIndexPosition = $indexPosition;
     }
     $this->_termCount++;
 }
Пример #2
0
 /**
  * Reset terms stream
  *
  * $startId - id for the fist document
  * $compact - remove deleted documents
  *
  * Returns start document id for the next segment
  *
  * @param integer $startId
  * @param integer $mode
  * @throws \Zend\Search\Lucene\Exception\InvalidArgumentException
  * @throws \Zend\Search\Lucene\Exception\InvalidFileFormatException
  * @return integer
  */
 public function resetTermsStream()
 {
     /**
      * SegmentInfo->resetTermsStream() method actually takes two optional parameters:
      *   $startId (default value is 0)
      *   $mode (default value is self::SM_TERMS_ONLY)
      */
     $argList = func_get_args();
     if (count($argList) > 2) {
         throw new InvalidArgumentException('Wrong number of arguments');
     } elseif (count($argList) == 2) {
         $startId = $argList[0];
         $mode = $argList[1];
     } elseif (count($argList) == 1) {
         $startId = $argList[0];
         $mode = self::SM_TERMS_ONLY;
     } else {
         $startId = 0;
         $mode = self::SM_TERMS_ONLY;
     }
     if ($this->_tisFile !== null) {
         $this->_tisFile = null;
     }
     $this->_tisFile = $this->openCompoundFile('.tis', false);
     $this->_tisFileOffset = $this->_tisFile->tell();
     $tiVersion = $this->_tisFile->readInt();
     if ($tiVersion != (int) 0xfffffffe && $tiVersion != (int) 0xfffffffd) {
         throw new InvalidFileFormatException('Wrong TermInfoFile file format');
     }
     $this->_termCount = $this->_termNum = $this->_tisFile->readLong();
     // Read terms count
     $this->_indexInterval = $this->_tisFile->readInt();
     // Read Index interval
     $this->_skipInterval = $this->_tisFile->readInt();
     // Read skip interval
     if ($tiVersion == (int) 0xfffffffd) {
         $maxSkipLevels = $this->_tisFile->readInt();
     }
     if ($this->_frqFile !== null) {
         $this->_frqFile = null;
     }
     if ($this->_prxFile !== null) {
         $this->_prxFile = null;
     }
     $this->_docMap = array();
     $this->_lastTerm = new Term('', -1);
     $this->_lastTermInfo = new TermInfo(0, 0, 0, 0);
     $this->_lastTermPositions = null;
     $this->_termsScanMode = $mode;
     switch ($mode) {
         case self::SM_TERMS_ONLY:
             // Do nothing
             break;
         case self::SM_FULL_INFO:
             // break intentionally omitted
         // break intentionally omitted
         case self::SM_MERGE_INFO:
             $this->_frqFile = $this->openCompoundFile('.frq', false);
             $this->_frqFileOffset = $this->_frqFile->tell();
             $this->_prxFile = $this->openCompoundFile('.prx', false);
             $this->_prxFileOffset = $this->_prxFile->tell();
             for ($count = 0; $count < $this->_docCount; $count++) {
                 if (!$this->isDeleted($count)) {
                     $this->_docMap[$count] = $startId + ($mode == self::SM_MERGE_INFO ? count($this->_docMap) : $count);
                 }
             }
             break;
         default:
             throw new InvalidArgumentException('Wrong terms scaning mode specified.');
             break;
     }
     // Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call)
     $nextSegmentStartId = $startId + ($mode == self::SM_MERGE_INFO ? count($this->_docMap) : $this->_docCount);
     $this->nextTerm();
     return $nextSegmentStartId;
 }