Exemple #1
0
 public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
 {
     $this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter));
     $this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter);
     // Initialize weight if it's not done yet
     $this->_initWeight($reader);
 }
 /**
  * Calculate result vector for non Conjunction query
  * (like '+something -another')
  *
  * @param Zend_Search_Lucene_Interface $reader
  */
 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 {
     $requiredVectors = array();
     $requiredVectorsSizes = array();
     $requiredVectorsIds = array();
     // is used to prevent arrays comparison
     $optional = array();
     $prohibited = array();
     foreach ($this->_terms as $termId => $term) {
         $termDocs = array_flip($reader->termDocs($term));
         if ($this->_signs[$termId] === true) {
             // required
             $requiredVectors[] = $termDocs;
             $requiredVectorsSizes[] = count($termDocs);
             $requiredVectorsIds[] = $termId;
         } elseif ($this->_signs[$termId] === false) {
             // prohibited
             // array union
             $prohibited += $termDocs;
         } else {
             // neither required, nor prohibited
             // array union
             $optional += $termDocs;
         }
         $this->_termsFreqs[$termId] = $reader->termFreqs($term);
     }
     // sort resvectors in order of subquery cardinality increasing
     array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors);
     $required = null;
     foreach ($requiredVectors as $nextResVector) {
         if ($required === null) {
             $required = $nextResVector;
         } else {
             //$required = array_intersect_key($required, $nextResVector);
             /**
              * This code is used as workaround for array_intersect_key() slowness problem.
              */
             $updatedVector = array();
             foreach ($required as $id => $value) {
                 if (isset($nextResVector[$id])) {
                     $updatedVector[$id] = $value;
                 }
             }
             $required = $updatedVector;
         }
         if (count($required) == 0) {
             // Empty result set, we don't need to check other terms
             break;
         }
     }
     if ($required !== null) {
         $this->_resVector = $required;
     } else {
         $this->_resVector = $optional;
     }
     if (count($prohibited) != 0) {
         // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
         /**
          * This code is used as workaround for array_diff_key() slowness problem.
          */
         if (count($this->_resVector) < count($prohibited)) {
             $updatedVector = $this->_resVector;
             foreach ($this->_resVector as $id => $value) {
                 if (isset($prohibited[$id])) {
                     unset($updatedVector[$id]);
                 }
             }
             $this->_resVector = $updatedVector;
         } else {
             $updatedVector = $this->_resVector;
             foreach ($prohibited as $id => $value) {
                 unset($updatedVector[$id]);
             }
             $this->_resVector = $updatedVector;
         }
     }
     ksort($this->_resVector, SORT_NUMERIC);
 }
 /**
  * Returns an array of all term freqs.
  * Return array structure: array( docId => freq, ...)
  *
  * @param Zend_Search_Lucene_Index_Term $term
  * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  * @return integer
  */
 public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
 {
     return $this->_index->termFreqs($term, $docsFilter);
 }
Exemple #4
0
 /**
  * Returns an array of all term freqs.
  * Return array structure: array( docId => freq, ...)
  *
  * @param Zend_Search_Lucene_Index_Term $term
  * @return integer
  */
 public function termFreqs(Zend_Search_Lucene_Index_Term $term)
 {
     return $this->_index->termFreqs($term);
 }
Exemple #5
0
 /**
  * Calculate result vector for non Conjunction query
  * (like '+something -another')
  *
  * @param Zend_Search_Lucene_Interface $reader
  */
 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 {
     $required = null;
     $optional = array();
     $prohibited = array();
     foreach ($this->_terms as $termId => $term) {
         $termDocs = array_flip($reader->termDocs($term));
         if ($this->_signs[$termId] === true) {
             // required
             if ($required !== null) {
                 // array intersection
                 $required = array_intersect_key($required, $termDocs);
             } else {
                 $required = $termDocs;
             }
         } elseif ($this->_signs[$termId] === false) {
             // prohibited
             // array union
             $prohibited += $termDocs;
         } else {
             // neither required, nor prohibited
             // array union
             $optional += $termDocs;
         }
         $this->_termsFreqs[$termId] = $reader->termFreqs($term);
     }
     if ($required !== null) {
         $this->_resVector = count($prohibited) > 0 ? array_diff_key($required, $prohibited) : $required;
     } else {
         $this->_resVector = count($prohibited) > 0 ? array_diff_key($optional, $prohibited) : $optional;
     }
     ksort($this->_resVector, SORT_NUMERIC);
 }
Exemple #6
0
 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 {
     $requiredVectors = array();
     $requiredVectorsSizes = array();
     $requiredVectorsIds = array();
     $optional = array();
     $prohibited = array();
     foreach ($this->_terms as $termId => $term) {
         $termDocs = array_flip($reader->termDocs($term));
         if ($this->_signs[$termId] === true) {
             $requiredVectors[] = $termDocs;
             $requiredVectorsSizes[] = count($termDocs);
             $requiredVectorsIds[] = $termId;
         } elseif ($this->_signs[$termId] === false) {
             $prohibited += $termDocs;
         } else {
             $optional += $termDocs;
         }
         $this->_termsFreqs[$termId] = $reader->termFreqs($term);
     }
     array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors);
     $required = null;
     foreach ($requiredVectors as $nextResVector) {
         if ($required === null) {
             $required = $nextResVector;
         } else {
             $updatedVector = array();
             foreach ($required as $id => $value) {
                 if (isset($nextResVector[$id])) {
                     $updatedVector[$id] = $value;
                 }
             }
             $required = $updatedVector;
         }
         if (count($required) == 0) {
             break;
         }
     }
     if ($required !== null) {
         $this->_resVector = $required;
     } else {
         $this->_resVector = $optional;
     }
     if (count($prohibited) != 0) {
         if (count($this->_resVector) < count($prohibited)) {
             $updatedVector = $this->_resVector;
             foreach ($this->_resVector as $id => $value) {
                 if (isset($prohibited[$id])) {
                     unset($updatedVector[$id]);
                 }
             }
             $this->_resVector = $updatedVector;
         } else {
             $updatedVector = $this->_resVector;
             foreach ($prohibited as $id => $value) {
                 unset($updatedVector[$id]);
             }
             $this->_resVector = $updatedVector;
         }
     }
     ksort($this->_resVector, SORT_NUMERIC);
 }