/** * Create dicrionary, frequency and positions files and write necessary headers */ public function initializeDictionaryFiles() { $this->_tisFile = $this->_directory->createFile($this->_name . '.tis'); $this->_tisFile->writeInt((int) 0xfffffffd); $this->_tisFile->writeLong(0); $this->_tisFile->writeInt(self::$indexInterval); $this->_tisFile->writeInt(self::$skipInterval); $this->_tisFile->writeInt(self::$maxSkipLevels); $this->_tiiFile = $this->_directory->createFile($this->_name . '.tii'); $this->_tiiFile->writeInt((int) 0xfffffffd); $this->_tiiFile->writeLong(0); $this->_tiiFile->writeInt(self::$indexInterval); $this->_tiiFile->writeInt(self::$skipInterval); $this->_tiiFile->writeInt(self::$maxSkipLevels); /** Dump dictionary header */ $this->_tiiFile->writeVInt(0); // preffix length $this->_tiiFile->writeString(''); // suffix $this->_tiiFile->writeInt((int) 0xffffffff); // field number $this->_tiiFile->writeByte((int) 0xf); $this->_tiiFile->writeVInt(0); // DocFreq $this->_tiiFile->writeVInt(0); // FreqDelta $this->_tiiFile->writeVInt(0); // ProxDelta $this->_tiiFile->writeVInt(24); // IndexDelta $this->_frqFile = $this->_directory->createFile($this->_name . '.frq'); $this->_prxFile = $this->_directory->createFile($this->_name . '.prx'); $this->_files[] = $this->_name . '.tis'; $this->_files[] = $this->_name . '.tii'; $this->_files[] = $this->_name . '.frq'; $this->_files[] = $this->_name . '.prx'; $this->_prevTerm = null; $this->_prevTermInfo = null; $this->_prevIndexTerm = null; $this->_prevIndexTermInfo = null; $this->_lastIndexPosition = 24; $this->_termCount = 0; }
/** * Adds a document to this segment. * * @param Zend_Search_Lucene_Document $document * @throws Zend_Search_Lucene_Exception */ public function addDocument(Zend_Search_Lucene_Document $document) { $storedFields = array(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); $this->_addFieldInfo($field); if ($field->storeTermVector) { /** * @todo term vector storing support */ throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue); } else { $tokenList = array(); $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue)); } $this->_fieldLengths[$field->name][$this->_docCount] = count($tokenList); $position = 0; foreach ($tokenList as $token) { $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else { if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } } if ($field->isStored) { $storedFields[] = $field; } } if (count($storedFields) != 0) { if (!isset($this->_fdxFile)) { $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx'); $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt'); $this->_files[] = $this->_name . '.fdx'; $this->_files[] = $this->_name . '.fdt'; } $this->_fdxFile->writeLong($this->_fdtFile->tell()); $this->_fdtFile->writeVInt(count($storedFields)); foreach ($storedFields as $field) { $this->_fdtFile->writeVInt($this->_fields[$field->name]->number); $fieldBits = ($field->isTokenized ? 0x1 : 0x0) | ($field->isBinary ? 0x2 : 0x0) | 0x0; /* 0x04 - third bit, compressed (ZLIB) */ $this->_fdtFile->writeByte($fieldBits); if ($field->isBinary) { $this->_fdtFile->writeVInt(strlen($field->stringValue)); $this->_fdtFile->writeBytes($field->stringValue); } else { $this->_fdtFile->writeString($field->stringValue); } } } $this->_docCount++; }