public function idf($input, Zend_Search_Lucene_Interface $reader) { if (!is_array($input)) { return $this->idfFreq($reader->docFreq($input), $reader->count()); } else { $idf = 0.0; foreach ($input as $term) { $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); } return $idf; } }
/** * Calculate result vector for Conjunction query * (like '+something +another') * * @param Zend_Search_Lucene_Interface $reader */ private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader) { $this->_resVector = null; if (count($this->_terms) == 0) { $this->_resVector = array(); } // Order terms by selectivity $docFreqs = array(); $ids = array(); foreach ($this->_terms as $id => $term) { $docFreqs[] = $reader->docFreq($term); $ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison } array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $this->_terms); $docsFilter = new Zend_Search_Lucene_Index_DocsFilter(); foreach ($this->_terms as $termId => $term) { $termDocs = $reader->termDocs($term, $docsFilter); } // Treat last retrieved docs vector as a result set // (filter collects data for other terms) $this->_resVector = array_flip($termDocs); foreach ($this->_terms as $termId => $term) { $this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter); } // ksort($this->_resVector, SORT_NUMERIC); // Docs are returned ordered. Used algorithms doesn't change elements order. }
/** * Returns the number of documents in this index containing the $term. * * @param Zend_Search_Lucene_Index_Term $term * @return integer */ public function docFreq(Zend_Search_Lucene_Index_Term $term) { return $this->_index->docFreq($term); }