Exemplo n.º 1
0
 /**
  * Create dicrionary, frequency and positions files and write necessary headers
  */
 public function initializeDictionaryFiles()
 {
     $this->_tisFile = $this->_directory->createFile($this->_name . '.tis');
     $this->_tisFile->writeInt((int) 0xfffffffd);
     $this->_tisFile->writeLong(0);
     $this->_tisFile->writeInt(self::$indexInterval);
     $this->_tisFile->writeInt(self::$skipInterval);
     $this->_tisFile->writeInt(self::$maxSkipLevels);
     $this->_tiiFile = $this->_directory->createFile($this->_name . '.tii');
     $this->_tiiFile->writeInt((int) 0xfffffffd);
     $this->_tiiFile->writeLong(0);
     $this->_tiiFile->writeInt(self::$indexInterval);
     $this->_tiiFile->writeInt(self::$skipInterval);
     $this->_tiiFile->writeInt(self::$maxSkipLevels);
     /** Dump dictionary header */
     $this->_tiiFile->writeVInt(0);
     // preffix length
     $this->_tiiFile->writeString('');
     // suffix
     $this->_tiiFile->writeInt((int) 0xffffffff);
     // field number
     $this->_tiiFile->writeByte((int) 0xf);
     $this->_tiiFile->writeVInt(0);
     // DocFreq
     $this->_tiiFile->writeVInt(0);
     // FreqDelta
     $this->_tiiFile->writeVInt(0);
     // ProxDelta
     $this->_tiiFile->writeVInt(24);
     // IndexDelta
     $this->_frqFile = $this->_directory->createFile($this->_name . '.frq');
     $this->_prxFile = $this->_directory->createFile($this->_name . '.prx');
     $this->_files[] = $this->_name . '.tis';
     $this->_files[] = $this->_name . '.tii';
     $this->_files[] = $this->_name . '.frq';
     $this->_files[] = $this->_name . '.prx';
     $this->_prevTerm = null;
     $this->_prevTermInfo = null;
     $this->_prevIndexTerm = null;
     $this->_prevIndexTermInfo = null;
     $this->_lastIndexPosition = 24;
     $this->_termCount = 0;
 }
Exemplo n.º 2
0
 /**
  * Adds a document to this segment.
  *
  * @param Zend_Search_Lucene_Document $document
  * @throws Zend_Search_Lucene_Exception
  */
 public function addDocument(Zend_Search_Lucene_Document $document)
 {
     $storedFields = array();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         $this->_addFieldInfo($field);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
             } else {
                 $tokenList = array();
                 $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
             }
             $this->_fieldLengths[$field->name][$this->_docCount] = count($tokenList);
             $position = 0;
             foreach ($tokenList as $token) {
                 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
                 $termKey = $term->key();
                 if (!isset($this->_termDictionary[$termKey])) {
                     // New term
                     $this->_termDictionary[$termKey] = $term;
                     $this->_termDocs[$termKey] = array();
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 } else {
                     if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                         // Existing term, but new term entry
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     }
                 }
                 $position += $token->getPositionIncrement();
                 $this->_termDocs[$termKey][$this->_docCount][] = $position;
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
     }
     if (count($storedFields) != 0) {
         if (!isset($this->_fdxFile)) {
             $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
             $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
             $this->_files[] = $this->_name . '.fdx';
             $this->_files[] = $this->_name . '.fdt';
         }
         $this->_fdxFile->writeLong($this->_fdtFile->tell());
         $this->_fdtFile->writeVInt(count($storedFields));
         foreach ($storedFields as $field) {
             $this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
             $fieldBits = ($field->isTokenized ? 0x1 : 0x0) | ($field->isBinary ? 0x2 : 0x0) | 0x0;
             /* 0x04 - third bit, compressed (ZLIB) */
             $this->_fdtFile->writeByte($fieldBits);
             if ($field->isBinary) {
                 $this->_fdtFile->writeVInt(strlen($field->stringValue));
                 $this->_fdtFile->writeBytes($field->stringValue);
             } else {
                 $this->_fdtFile->writeString($field->stringValue);
             }
         }
     }
     $this->_docCount++;
 }