Пример #1
0
 public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
 {
     $this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter));
     $this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter);
     // Initialize weight if it's not done yet
     $this->_initWeight($reader);
 }
 /**
  * Remove  a post from the index
  *
  * @param Post $post the post being deleted
  */
 public function delete_post($post)
 {
     $term = new Zend_Search_Lucene_Index_Term($post->id, 'postid');
     $docIds = $this->_index->termDocs($term);
     foreach ($docIds as $id) {
         $this->_index->delete($id);
     }
 }
Пример #3
0
 /**
  * Get the Lucene document IDs by search the specified search field.
  * If no search field is specified, the default ID field is used.
  *
  * @param string $value
  * @param string $searchField
  * @return array
  */
 public function getDocumentIds($value, $searchField = null)
 {
     if (!$searchField) {
         $searchField = $this->_idKey;
     }
     $term = new Zend_Search_Lucene_Index_Term($value, $searchField);
     $docIds = $this->_searchIndex->termDocs($term);
     return $docIds;
 }
Пример #4
0
 /**
  * Calculate result vector for non Conjunction query
  * (like '+something -another')
  *
  * @param Zend_Search_Lucene_Interface $reader
  */
 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 {
     $requiredVectors = array();
     $requiredVectorsSizes = array();
     $requiredVectorsIds = array();
     // is used to prevent arrays comparison
     $optional = array();
     $prohibited = array();
     foreach ($this->_terms as $termId => $term) {
         $termDocs = array_flip($reader->termDocs($term));
         if ($this->_signs[$termId] === true) {
             // required
             $requiredVectors[] = $termDocs;
             $requiredVectorsSizes[] = count($termDocs);
             $requiredVectorsIds[] = $termId;
         } elseif ($this->_signs[$termId] === false) {
             // prohibited
             // array union
             $prohibited += $termDocs;
         } else {
             // neither required, nor prohibited
             // array union
             $optional += $termDocs;
         }
         $this->_termsFreqs[$termId] = $reader->termFreqs($term);
     }
     // sort resvectors in order of subquery cardinality increasing
     array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors);
     $required = null;
     foreach ($requiredVectors as $nextResVector) {
         if ($required === null) {
             $required = $nextResVector;
         } else {
             //$required = array_intersect_key($required, $nextResVector);
             /**
              * This code is used as workaround for array_intersect_key() slowness problem.
              */
             $updatedVector = array();
             foreach ($required as $id => $value) {
                 if (isset($nextResVector[$id])) {
                     $updatedVector[$id] = $value;
                 }
             }
             $required = $updatedVector;
         }
         if (count($required) == 0) {
             // Empty result set, we don't need to check other terms
             break;
         }
     }
     if ($required !== null) {
         $this->_resVector = $required;
     } else {
         $this->_resVector = $optional;
     }
     if (count($prohibited) != 0) {
         // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
         /**
          * This code is used as workaround for array_diff_key() slowness problem.
          */
         if (count($this->_resVector) < count($prohibited)) {
             $updatedVector = $this->_resVector;
             foreach ($this->_resVector as $id => $value) {
                 if (isset($prohibited[$id])) {
                     unset($updatedVector[$id]);
                 }
             }
             $this->_resVector = $updatedVector;
         } else {
             $updatedVector = $this->_resVector;
             foreach ($prohibited as $id => $value) {
                 unset($updatedVector[$id]);
             }
             $this->_resVector = $updatedVector;
         }
     }
     ksort($this->_resVector, SORT_NUMERIC);
 }
Пример #5
0
 /**
  * Returns IDs of all the documents containing term.
  *
  * @param Zend_Search_Lucene_Index_Term $term
  * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  * @return array
  */
 public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
 {
     return $this->_index->termDocs($term, $docsFilter);
 }
Пример #6
0
 /**
  * Execute query in context of index reader
  * It also initializes necessary internal structures
  *
  * @param Zend_Search_Lucene_Interface $reader
  * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  */
 public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
 {
     $this->_resVector = null;
     if (count($this->_terms) == 0) {
         $this->_resVector = array();
     }
     $resVectors = array();
     $resVectorsSizes = array();
     $resVectorsIds = array();
     // is used to prevent arrays comparison
     foreach ($this->_terms as $termId => $term) {
         $resVectors[] = array_flip($reader->termDocs($term));
         $resVectorsSizes[] = count(end($resVectors));
         $resVectorsIds[] = $termId;
         $this->_termsPositions[$termId] = $reader->termPositions($term);
     }
     // sort resvectors in order of subquery cardinality increasing
     array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC, $resVectorsIds, SORT_ASC, SORT_NUMERIC, $resVectors);
     foreach ($resVectors as $nextResVector) {
         if ($this->_resVector === null) {
             $this->_resVector = $nextResVector;
         } else {
             //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
             /**
              * This code is used as workaround for array_intersect_key() slowness problem.
              */
             $updatedVector = array();
             foreach ($this->_resVector as $id => $value) {
                 if (isset($nextResVector[$id])) {
                     $updatedVector[$id] = $value;
                 }
             }
             $this->_resVector = $updatedVector;
         }
         if (count($this->_resVector) == 0) {
             // Empty result set, we don't need to check other terms
             break;
         }
     }
     // ksort($this->_resVector, SORT_NUMERIC);
     // Docs are returned ordered. Used algorithm doesn't change elements order.
     // Initialize weight if it's not done yet
     $this->_initWeight($reader);
 }
Пример #7
0
 /**
  * Returns IDs of all the documents containing term.
  *
  * @param Zend_Search_Lucene_Index_Term $term
  * @return array
  */
 public function termDocs(Zend_Search_Lucene_Index_Term $term)
 {
     return $this->_index->termDocs($term);
 }
 /**
  * @param  Zend_Search_Lucene_Interface $index
  * @param AJXP_Node $ajxpNode
  * @return Number
  */
 public function getIndexedDocumentId($index, $ajxpNode)
 {
     $term = new Zend_Search_Lucene_Index_Term(SystemTextEncoding::toUTF8($ajxpNode->getUrl()), "node_url");
     $docIds = $index->termDocs($term);
     if (!count($docIds)) {
         return null;
     }
     return $docIds[0];
 }
Пример #9
0
 /**
  * Calculate result vector for non Conjunction query
  * (like '+something -another')
  *
  * @param Zend_Search_Lucene_Interface $reader
  */
 private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
 {
     $required = null;
     $optional = array();
     $prohibited = array();
     foreach ($this->_terms as $termId => $term) {
         $termDocs = array_flip($reader->termDocs($term));
         if ($this->_signs[$termId] === true) {
             // required
             if ($required !== null) {
                 // array intersection
                 $required = array_intersect_key($required, $termDocs);
             } else {
                 $required = $termDocs;
             }
         } elseif ($this->_signs[$termId] === false) {
             // prohibited
             // array union
             $prohibited += $termDocs;
         } else {
             // neither required, nor prohibited
             // array union
             $optional += $termDocs;
         }
         $this->_termsFreqs[$termId] = $reader->termFreqs($term);
     }
     if ($required !== null) {
         $this->_resVector = count($prohibited) > 0 ? array_diff_key($required, $prohibited) : $required;
     } else {
         $this->_resVector = count($prohibited) > 0 ? array_diff_key($optional, $prohibited) : $optional;
     }
     ksort($this->_resVector, SORT_NUMERIC);
 }
Пример #10
0
 /**
  * Execute query in context of index reader
  * It also initializes necessary internal structures
  *
  * @param Zend_Search_Lucene_Interface $reader
  */
 public function execute(Zend_Search_Lucene_Interface $reader)
 {
     $this->_resVector = null;
     if (count($this->_terms) == 0) {
         $this->_resVector = array();
     }
     foreach ($this->_terms as $termId => $term) {
         if ($this->_resVector === null) {
             $this->_resVector = array_flip($reader->termDocs($term));
         } else {
             $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
         }
         if (count($this->_resVector) == 0) {
             // Empty result set, we don't need to check other terms
             break;
         }
         $this->_termsPositions[$termId] = $reader->termPositions($term);
     }
     ksort($this->_resVector, SORT_NUMERIC);
     // Initialize weight if it's not done yet
     $this->_initWeight($reader);
 }
Пример #11
0
 public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
 {
     $this->_resVector = null;
     if (count($this->_terms) == 0) {
         $this->_resVector = array();
     }
     $resVectors = array();
     $resVectorsSizes = array();
     $resVectorsIds = array();
     foreach ($this->_terms as $termId => $term) {
         $resVectors[] = array_flip($reader->termDocs($term));
         $resVectorsSizes[] = count(end($resVectors));
         $resVectorsIds[] = $termId;
         $this->_termsPositions[$termId] = $reader->termPositions($term);
     }
     array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC, $resVectorsIds, SORT_ASC, SORT_NUMERIC, $resVectors);
     foreach ($resVectors as $nextResVector) {
         if ($this->_resVector === null) {
             $this->_resVector = $nextResVector;
         } else {
             $updatedVector = array();
             foreach ($this->_resVector as $id => $value) {
                 if (isset($nextResVector[$id])) {
                     $updatedVector[$id] = $value;
                 }
             }
             $this->_resVector = $updatedVector;
         }
         if (count($this->_resVector) == 0) {
             break;
         }
     }
     $this->_initWeight($reader);
 }