public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null) { $this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter)); $this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter); // Initialize weight if it's not done yet $this->_initWeight($reader); }
/** * Remove a post from the index * * @param Post $post the post being deleted */ public function delete_post($post) { $term = new Zend_Search_Lucene_Index_Term($post->id, 'postid'); $docIds = $this->_index->termDocs($term); foreach ($docIds as $id) { $this->_index->delete($id); } }
/** * Get the Lucene document IDs by search the specified search field. * If no search field is specified, the default ID field is used. * * @param string $value * @param string $searchField * @return array */ public function getDocumentIds($value, $searchField = null) { if (!$searchField) { $searchField = $this->_idKey; } $term = new Zend_Search_Lucene_Index_Term($value, $searchField); $docIds = $this->_searchIndex->termDocs($term); return $docIds; }
/** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param Zend_Search_Lucene_Interface $reader */ private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader) { $requiredVectors = array(); $requiredVectorsSizes = array(); $requiredVectorsIds = array(); // is used to prevent arrays comparison $optional = array(); $prohibited = array(); foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required $requiredVectors[] = $termDocs; $requiredVectorsSizes[] = count($termDocs); $requiredVectorsIds[] = $termId; } elseif ($this->_signs[$termId] === false) { // prohibited // array union $prohibited += $termDocs; } else { // neither required, nor prohibited // array union $optional += $termDocs; } $this->_termsFreqs[$termId] = $reader->termFreqs($term); } // sort resvectors in order of subquery cardinality increasing array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors); $required = null; foreach ($requiredVectors as $nextResVector) { if ($required === null) { $required = $nextResVector; } else { //$required = array_intersect_key($required, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($required as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $required = $updatedVector; } if (count($required) == 0) { // Empty result set, we don't need to check other terms break; } } if ($required !== null) { $this->_resVector = $required; } else { $this->_resVector = $optional; } if (count($prohibited) != 0) { // $this->_resVector = array_diff_key($this->_resVector, $prohibited); /** * This code is used as workaround for array_diff_key() slowness problem. */ if (count($this->_resVector) < count($prohibited)) { $updatedVector = $this->_resVector; foreach ($this->_resVector as $id => $value) { if (isset($prohibited[$id])) { unset($updatedVector[$id]); } } $this->_resVector = $updatedVector; } else { $updatedVector = $this->_resVector; foreach ($prohibited as $id => $value) { unset($updatedVector[$id]); } $this->_resVector = $updatedVector; } } ksort($this->_resVector, SORT_NUMERIC); }
/** * Returns IDs of all the documents containing term. * * @param Zend_Search_Lucene_Index_Term $term * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter * @return array */ public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) { return $this->_index->termDocs($term, $docsFilter); }
/** * Execute query in context of index reader * It also initializes necessary internal structures * * @param Zend_Search_Lucene_Interface $reader * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter */ public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null) { $this->_resVector = null; if (count($this->_terms) == 0) { $this->_resVector = array(); } $resVectors = array(); $resVectorsSizes = array(); $resVectorsIds = array(); // is used to prevent arrays comparison foreach ($this->_terms as $termId => $term) { $resVectors[] = array_flip($reader->termDocs($term)); $resVectorsSizes[] = count(end($resVectors)); $resVectorsIds[] = $termId; $this->_termsPositions[$termId] = $reader->termPositions($term); } // sort resvectors in order of subquery cardinality increasing array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC, $resVectorsIds, SORT_ASC, SORT_NUMERIC, $resVectors); foreach ($resVectors as $nextResVector) { if ($this->_resVector === null) { $this->_resVector = $nextResVector; } else { //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($this->_resVector as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $this->_resVector = $updatedVector; } if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } } // ksort($this->_resVector, SORT_NUMERIC); // Docs are returned ordered. Used algorithm doesn't change elements order. // Initialize weight if it's not done yet $this->_initWeight($reader); }
/** * Returns IDs of all the documents containing term. * * @param Zend_Search_Lucene_Index_Term $term * @return array */ public function termDocs(Zend_Search_Lucene_Index_Term $term) { return $this->_index->termDocs($term); }
/** * @param Zend_Search_Lucene_Interface $index * @param AJXP_Node $ajxpNode * @return Number */ public function getIndexedDocumentId($index, $ajxpNode) { $term = new Zend_Search_Lucene_Index_Term(SystemTextEncoding::toUTF8($ajxpNode->getUrl()), "node_url"); $docIds = $index->termDocs($term); if (!count($docIds)) { return null; } return $docIds[0]; }
/** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param Zend_Search_Lucene_Interface $reader */ private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader) { $required = null; $optional = array(); $prohibited = array(); foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required if ($required !== null) { // array intersection $required = array_intersect_key($required, $termDocs); } else { $required = $termDocs; } } elseif ($this->_signs[$termId] === false) { // prohibited // array union $prohibited += $termDocs; } else { // neither required, nor prohibited // array union $optional += $termDocs; } $this->_termsFreqs[$termId] = $reader->termFreqs($term); } if ($required !== null) { $this->_resVector = count($prohibited) > 0 ? array_diff_key($required, $prohibited) : $required; } else { $this->_resVector = count($prohibited) > 0 ? array_diff_key($optional, $prohibited) : $optional; } ksort($this->_resVector, SORT_NUMERIC); }
/** * Execute query in context of index reader * It also initializes necessary internal structures * * @param Zend_Search_Lucene_Interface $reader */ public function execute(Zend_Search_Lucene_Interface $reader) { $this->_resVector = null; if (count($this->_terms) == 0) { $this->_resVector = array(); } foreach ($this->_terms as $termId => $term) { if ($this->_resVector === null) { $this->_resVector = array_flip($reader->termDocs($term)); } else { $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term))); } if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } $this->_termsPositions[$termId] = $reader->termPositions($term); } ksort($this->_resVector, SORT_NUMERIC); // Initialize weight if it's not done yet $this->_initWeight($reader); }
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null) { $this->_resVector = null; if (count($this->_terms) == 0) { $this->_resVector = array(); } $resVectors = array(); $resVectorsSizes = array(); $resVectorsIds = array(); foreach ($this->_terms as $termId => $term) { $resVectors[] = array_flip($reader->termDocs($term)); $resVectorsSizes[] = count(end($resVectors)); $resVectorsIds[] = $termId; $this->_termsPositions[$termId] = $reader->termPositions($term); } array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC, $resVectorsIds, SORT_ASC, SORT_NUMERIC, $resVectors); foreach ($resVectors as $nextResVector) { if ($this->_resVector === null) { $this->_resVector = $nextResVector; } else { $updatedVector = array(); foreach ($this->_resVector as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $this->_resVector = $updatedVector; } if (count($this->_resVector) == 0) { break; } } $this->_initWeight($reader); }