/** * Execute query in context of index reader * It also initializes necessary internal structures * * @param \Zend\Search\Lucene\IndexInterface $reader * @param \Zend\Search\Lucene\Index\DocsFilter|null $docsFilter */ public function execute(Lucene\IndexInterface $reader, $docsFilter = null) { $this->_resVector = null; if (count($this->_terms) == 0) { $this->_resVector = array(); } $resVectors = array(); $resVectorsSizes = array(); $resVectorsIds = array(); // is used to prevent arrays comparison foreach ($this->_terms as $termId => $term) { $resVectors[] = array_flip($reader->termDocs($term)); $resVectorsSizes[] = count(end($resVectors)); $resVectorsIds[] = $termId; $this->_termsPositions[$termId] = $reader->termPositions($term); } // sort resvectors in order of subquery cardinality increasing array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC, $resVectorsIds, SORT_ASC, SORT_NUMERIC, $resVectors); foreach ($resVectors as $nextResVector) { if ($this->_resVector === null) { $this->_resVector = $nextResVector; } else { //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($this->_resVector as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $this->_resVector = $updatedVector; } if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } } // ksort($this->_resVector, SORT_NUMERIC); // Docs are returned ordered. Used algorithm doesn't change elements order. // Initialize weight if it's not done yet $this->_initWeight($reader); }
/** * Execute query in context of index reader * It also initializes necessary internal structures * * @param \Zend\Search\Lucene\IndexInterface $reader * @param \Zend\Search\Lucene\Index\DocsFilter|null $docsFilter */ public function execute(Lucene\IndexInterface $reader, $docsFilter = null) { $this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter)); $this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter); // Initialize weight if it's not done yet $this->_initWeight($reader); }
/** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param \Zend\Search\Lucene\IndexInterface $reader */ private function _calculateNonConjunctionResult(Lucene\IndexInterface $reader) { $requiredVectors = array(); $requiredVectorsSizes = array(); $requiredVectorsIds = array(); // is used to prevent arrays comparison $optional = array(); $prohibited = array(); foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required $requiredVectors[] = $termDocs; $requiredVectorsSizes[] = count($termDocs); $requiredVectorsIds[] = $termId; } elseif ($this->_signs[$termId] === false) { // prohibited // array union $prohibited += $termDocs; } else { // neither required, nor prohibited // array union $optional += $termDocs; } $this->_termsFreqs[$termId] = $reader->termFreqs($term); } // sort resvectors in order of subquery cardinality increasing array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors); $required = null; foreach ($requiredVectors as $nextResVector) { if ($required === null) { $required = $nextResVector; } else { //$required = array_intersect_key($required, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($required as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $required = $updatedVector; } if (count($required) == 0) { // Empty result set, we don't need to check other terms break; } } if ($required !== null) { $this->_resVector = $required; } else { $this->_resVector = $optional; } if (count($prohibited) != 0) { // $this->_resVector = array_diff_key($this->_resVector, $prohibited); /** * This code is used as workaround for array_diff_key() slowness problem. */ if (count($this->_resVector) < count($prohibited)) { $updatedVector = $this->_resVector; foreach ($this->_resVector as $id => $value) { if (isset($prohibited[$id])) { unset($updatedVector[$id]); } } $this->_resVector = $updatedVector; } else { $updatedVector = $this->_resVector; foreach ($prohibited as $id => $value) { unset($updatedVector[$id]); } $this->_resVector = $updatedVector; } } ksort($this->_resVector, SORT_NUMERIC); }