/** * The sum of squared weights of contained query clauses. * * @return float */ public function sumOfSquaredWeights() { // compute idf $this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader); // compute query weight $this->_queryWeight = $this->_idf * $this->_query->getBoost(); // square it return $this->_queryWeight * $this->_queryWeight; }
public function score($docId, Zend_Search_Lucene_Interface $reader) { if (isset($this->_docVector[$docId])) { return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) * $this->_weight->getValue() * $reader->norm($docId, $this->_term->field) * $this->getBoost(); } else { return 0; } }
/** * Score calculator for non conjunction queries (not all terms are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _nonConjunctionScore($docId, $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0.0; $matchedTerms = 0; foreach ($this->_terms as $termId => $term) { // Check if term is if ($this->_signs[$termId] !== false && isset($this->_termsFreqs[$termId][$docId])) { $matchedTerms++; /** * We don't need to check that term freq is not 0 * Score calculation is performed only for matched docs */ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } } return $score * $this->_coord[$matchedTerms] * $this->getBoost(); }
/** * Score calculator for non conjunction queries (not all subqueries are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0; $matchedSubqueries = 0; foreach ($this->_subqueries as $subqueryId => $subquery) { $subscore = $subquery->score($docId, $reader); // Prohibited if ($this->_signs[$subqueryId] === false && $subscore != 0) { return 0; } // is required, but doen't match if ($this->_signs[$subqueryId] === true && $subscore == 0) { return 0; } if ($subscore != 0) { $matchedSubqueries++; $score += $subscore; } } return $score * $this->_coord[$matchedSubqueries] * $this->getBoost(); }
/** * Retrive similarity used by index reader * * @return Zend_Search_Lucene_Search_Similarity */ public function getSimilarity() { return $this->_index->getSimilarity(); }
/** * Score calculator for sloppy phrase queries (terms sequence is fixed) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader) { $freq = 0; $phraseQueue = array(); $phraseQueue[0] = array(); // empty phrase $lastTerm = null; // Walk through the terms to create phrases. foreach ($this->_terms as $termId => $term) { $queueSize = count($phraseQueue); $firstPass = true; // Walk through the term positions. // Each term position produces a set of phrases. foreach ($this->_termsPositions[$termId][$docId] as $termPosition) { if ($firstPass) { for ($count = 0; $count < $queueSize; $count++) { $phraseQueue[$count][$termId] = $termPosition; } } else { for ($count = 0; $count < $queueSize; $count++) { if ($lastTerm !== null && abs($termPosition - $phraseQueue[$count][$lastTerm] - ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) { continue; } $newPhraseId = count($phraseQueue); $phraseQueue[$newPhraseId] = $phraseQueue[$count]; $phraseQueue[$newPhraseId][$termId] = $termPosition; } } $firstPass = false; } $lastTerm = $termId; } foreach ($phraseQueue as $phrasePos) { $minDistance = null; for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) { $distance = 0; $start = reset($phrasePos) - reset($this->_offsets) + $shift; foreach ($this->_terms as $termId => $term) { $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start); if ($distance > $this->_slop) { break; } } if ($minDistance === null || $distance < $minDistance) { $minDistance = $distance; } } if ($minDistance <= $this->_slop) { $freq += $reader->getSimilarity()->sloppyFreq($minDistance); } } return $freq; }
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = $reader->getSimilarity()->coord(count($this->_terms), count($this->_terms)); } $score = 0.0; foreach ($this->_terms as $termId => $term) { $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } return $score * $this->_coord * $this->getBoost(); }