/** * Adds a document to this index. * * @param Zend_Search_Lucene_Document $document */ public function addDocument(Zend_Search_Lucene_Document $document) { $this->_documents[$this->_docID] = $document; // parse document $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $fieldNames = $document->getFieldnames(); foreach ($fieldNames as $fieldName) { $field = $document->getField($fieldName); // tokenize if requested if ($field->isTokenized) { $tokens = $analyzer->tokenize($field->getUtf8Value(), 'UTF-8'); } else { $tokens = array(new Zend_Search_Lucene_Analysis_Token($field->getUtf8Value(), 0, strlen(utf8_decode($field->getUtf8Value())))); } // store tokens in "index" $position = -1; foreach ($tokens as $token) { $text = $token->getTermText(); $term = new Zend_Search_Lucene_Index_Term($text, $fieldName); $position += $token->getPositionIncrement(); // build an ordered array (list) of terms for each field if (isset($this->_terms[$fieldName])) { // if the term is not set already, sort it in if (!isset($this->_terms[$fieldName][$text])) { $new = array(); while (($current = array_shift($this->_terms[$fieldName])) && $text > $current->text) { $new[$current->text] = $current; } $new[$text] = $term; if ($current) { $new[$current->text] = $current; } $this->_terms[$fieldName] = array_merge($new, $this->_terms[$fieldName]); } } else { // first terms in each field are just stored $this->_terms[$fieldName][$text] = $term; } // store termPosition for this term $this->_termPositions[$fieldName][$text][$this->_docID][] = $position; // store or increase term freq for this document if (!isset($this->_termDocs[$fieldName][$text][$this->_docID])) { $this->_termDocs[$fieldName][$text][$this->_docID] = 1; } else { $this->_termDocs[$fieldName][$text][$this->_docID]++; } } // remember fieldname and document $this->_fields[$fieldName][$this->_docID] = 1; // calculate and store normalisation vector $this->_norms[$fieldName][$this->_docID] = $this->getSimilarity()->lengthNorm($fieldName, sizeof($tokens)) * $document->boost * $field->boost; } // increase docID $this->_docID++; }