public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { return new Zend_Search_Lucene_Search_Query_Empty(); } // Check, that all fields are qualified $allQualified = true; foreach ($this->_terms as $term) { if ($term->field === null) { $allQualified = false; break; } } if ($allQualified) { return $this; } else { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($this->_terms as $termId => $term) { $subquery = new Zend_Search_Lucene_Search_Query_Term($term); $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]); } return $query; } }
public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { return new Zend_Search_Lucene_Search_Query_Empty(); } else { if ($this->_terms[0]->field !== null) { return $this; } else { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($index->getFieldNames(true) as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Phrase(); $subquery->setSlop($this->getSlop()); foreach ($this->_terms as $termId => $term) { $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName); $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]); } $query->addSubquery($subquery); } return $query; } } }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { require_once 'Zend/Search/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } // Check, that all fields are qualified $allQualified = true; foreach ($this->_terms as $term) { if ($term->field === null) { $allQualified = false; break; } } if ($allQualified) { return $this; } else { /** transform multiterm query to boolean and apply rewrite() method to subqueries. */ require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); require_once 'Zend/Search/Lucene/Search/Query/Term.php'; foreach ($this->_terms as $termId => $term) { $subquery = new Zend_Search_Lucene_Search_Query_Term($term); $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]); } return $query; } }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { // Allow to use wildcards within phrases // They are either removed by text analyzer or used as a part of keyword for keyword fields // // if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) { // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; // throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.'); // } // Split query into subqueries if field name is not specified if ($this->_field === null) { require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); require_once 'Zend/Search/Lucene.php'; if (Zend_Search_Lucene::getDefaultSearchField() === null) { $searchFields = $index->getFieldNames(true); } else { $searchFields = array(Zend_Search_Lucene::getDefaultSearchField()); } foreach ($searchFields as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase, $this->_phraseEncoding, $fieldName); $subquery->setSlop($this->getSlop()); $query->addSubquery($subquery->rewrite($index)); } $this->_matches = $query->getQueryTerms(); return $query; } // Recognize exact term matching (it corresponds to Keyword fields stored in the index) // encoding is not used since we expect binary matching require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field); if ($index->hasTerm($term)) { require_once 'Zend/Search/Lucene/Search/Query/Term.php'; $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } // tokenize phrase using current analyzer and process it as a phrase query require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding); if (count($tokens) == 0) { $this->_matches = array(); require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($tokens) == 1) { require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field); require_once 'Zend/Search/Lucene/Search/Query/Term.php'; $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } //It's non-trivial phrase query $position = -1; require_once 'Zend/Search/Lucene/Search/Query/Phrase.php'; $query = new Zend_Search_Lucene_Search_Query_Phrase(); require_once 'Zend/Search/Lucene/Index/Term.php'; foreach ($tokens as $token) { $position += $token->getPositionIncrement(); $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field); $query->addTerm($term, $position); $query->setSlop($this->getSlop()); } $this->_matches = $query->getQueryTerms(); return $query; }
/** * Optimize query in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function optimize(Zend_Search_Lucene_Interface $index) { $subqueries = array(); $signs = array(); // Optimize all subqueries foreach ($this->_subqueries as $id => $subquery) { $subqueries[] = $subquery->optimize($index); $signs[] = $this->_signs === null ? true : $this->_signs[$id]; } // Remove insignificant subqueries foreach ($subqueries as $id => $subquery) { if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) { // Insignificant subquery has to be removed anyway unset($subqueries[$id]); unset($signs[$id]); } } if (count($subqueries) == 0) { // Boolean query doesn't has non-insignificant subqueries return new Zend_Search_Lucene_Search_Query_Insignificant(); } // Check if all non-insignificant subqueries are prohibited $allProhibited = true; foreach ($signs as $sign) { if ($sign !== false) { $allProhibited = false; break; } } if ($allProhibited) { return new Zend_Search_Lucene_Search_Query_Insignificant(); } // Check for empty subqueries foreach ($subqueries as $id => $subquery) { if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) { if ($signs[$id] === true) { // Matching is required, but is actually empty return new Zend_Search_Lucene_Search_Query_Empty(); } else { // Matching is optional or prohibited, but is empty // Remove it from subqueries and signs list unset($subqueries[$id]); unset($signs[$id]); } } } // Check, if reduced subqueries list is empty if (count($subqueries) == 0) { return new Zend_Search_Lucene_Search_Query_Empty(); } // Check if all non-empty subqueries are prohibited $allProhibited = true; foreach ($signs as $sign) { if ($sign !== false) { $allProhibited = false; break; } } if ($allProhibited) { return new Zend_Search_Lucene_Search_Query_Empty(); } // Check, if reduced subqueries list has only one entry if (count($subqueries) == 1) { // It's a query with only one required or optional clause // (it's already checked, that it's not a prohibited clause) if ($this->getBoost() == 1) { return reset($subqueries); } $optimizedQuery = clone reset($subqueries); $optimizedQuery->setBoost($optimizedQuery->getBoost() * $this->getBoost()); return $optimizedQuery; } // Prepare first candidate for optimized query $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs); $optimizedQuery->setBoost($this->getBoost()); $terms = array(); $tsigns = array(); $boostFactors = array(); // Try to decompose term and multi-term subqueries foreach ($subqueries as $id => $subquery) { if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) { $terms[] = $subquery->getTerm(); $tsigns[] = $signs[$id]; $boostFactors[] = $subquery->getBoost(); // remove subquery from a subqueries list unset($subqueries[$id]); unset($signs[$id]); } else { if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) { $subTerms = $subquery->getTerms(); $subSigns = $subquery->getSigns(); if ($signs[$id] === true) { // It's a required multi-term subquery. // Something like '... +(+term1 -term2 term3 ...) ...' // Multi-term required subquery can be decomposed only if it contains // required terms and doesn't contain prohibited terms: // ... +(+term1 term2 ...) ... => ... +term1 term2 ... // // Check this $hasRequired = false; $hasProhibited = false; if ($subSigns === null) { // All subterms are required $hasRequired = true; } else { foreach ($subSigns as $sign) { if ($sign === true) { $hasRequired = true; } else { if ($sign === false) { $hasProhibited = true; break; } } } } // Continue if subquery has prohibited terms or doesn't have required terms if ($hasProhibited || !$hasRequired) { continue; } foreach ($subTerms as $termId => $term) { $terms[] = $term; $tsigns[] = $subSigns === null ? true : $subSigns[$termId]; $boostFactors[] = $subquery->getBoost(); } // remove subquery from a subqueries list unset($subqueries[$id]); unset($signs[$id]); } else { // $signs[$id] === null || $signs[$id] === false // It's an optional or prohibited multi-term subquery. // Something like '... (+term1 -term2 term3 ...) ...' // or // something like '... -(+term1 -term2 term3 ...) ...' // Multi-term optional and required subqueries can be decomposed // only if all terms are optional. // // Check if all terms are optional. $onlyOptional = true; if ($subSigns === null) { // All subterms are required $onlyOptional = false; } else { foreach ($subSigns as $sign) { if ($sign !== null) { $onlyOptional = false; break; } } } // Continue if non-optional terms are presented in this multi-term subquery if (!$onlyOptional) { continue; } foreach ($subTerms as $termId => $term) { $terms[] = $term; $tsigns[] = $signs[$id] === null ? null : false; $boostFactors[] = $subquery->getBoost(); } // remove subquery from a subqueries list unset($subqueries[$id]); unset($signs[$id]); } } } } // Check, if there are no decomposed subqueries if (count($terms) == 0) { // return prepared candidate return $optimizedQuery; } // Check, if all subqueries have been decomposed and all terms has the same boost factor if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) { $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns); $optimizedQuery->setBoost(reset($boostFactors) * $this->getBoost()); return $optimizedQuery; } // This boolean query can't be transformed to Term/MultiTerm query and still contains // several subqueries // Separate prohibited terms $prohibitedTerms = array(); foreach ($terms as $id => $term) { if ($tsigns[$id] === false) { $prohibitedTerms[] = $term; unset($terms[$id]); unset($tsigns[$id]); unset($boostFactors[$id]); } } if (count($terms) == 1) { $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms)); $clause->setBoost(reset($boostFactors)); $subqueries[] = $clause; $signs[] = reset($tsigns); // Clear terms list $terms = array(); } else { if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) { $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns); $clause->setBoost(reset($boostFactors)); $subqueries[] = $clause; // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise. $signs[] = in_array(true, $tsigns) ? true : null; // Clear terms list $terms = array(); } } if (count($prohibitedTerms) == 1) { // (boost factors are not significant for prohibited clauses) $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms)); $signs[] = false; // Clear prohibited terms list $prohibitedTerms = array(); } else { if (count($prohibitedTerms) > 1) { // prepare signs array $prohibitedSigns = array(); foreach ($prohibitedTerms as $id => $term) { // all prohibited term are grouped as optional into multi-term query $prohibitedSigns[$id] = null; } // (boost factors are not significant for prohibited clauses) $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns); // Clause sign is 'prohibited' $signs[] = false; // Clear terms list $prohibitedTerms = array(); } } /** @todo Group terms with the same boost factors together */ // Check, that all terms are processed // Replace candidate for optimized query if (count($terms) == 0 && count($prohibitedTerms) == 0) { $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs); $optimizedQuery->setBoost($this->getBoost()); } return $optimizedQuery; }
/** * Re-write queries into primitive queries * Also used for query optimization and binding to the index * * @param Zend_Search_Lucene $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene $index) { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($this->_subqueries as $subqueryId => $subquery) { $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$subqueryId]); } return $query; }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } else { if ($this->_terms[0]->field !== null) { return $this; } else { require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($index->getFieldNames(true) as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Phrase(); $subquery->setSlop($this->getSlop()); require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Index/Term.php'; foreach ($this->_terms as $termId => $term) { $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName); $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]); } $query->addSubquery($subquery); } return $query; } } }
public function rewrite(Zend_Search_Lucene_Interface $index) { if ($this->_field === null) { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); if (Zend_Search_Lucene::getDefaultSearchField() === null) { $searchFields = $index->getFieldNames(true); } else { $searchFields = array(Zend_Search_Lucene::getDefaultSearchField()); } foreach ($searchFields as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase, $this->_phraseEncoding, $fieldName); $subquery->setSlop($this->getSlop()); $query->addSubquery($subquery->rewrite($index)); } $this->_matches = $query->getQueryTerms(); return $query; } $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field); if ($index->hasTerm($term)) { $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding); if (count($tokens) == 0) { $this->_matches = array(); return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($tokens) == 1) { $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field); $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } $position = -1; $query = new Zend_Search_Lucene_Search_Query_Phrase(); foreach ($tokens as $token) { $position += $token->getPositionIncrement(); $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field); $query->addTerm($term, $position); $query->setSlop($this->getSlop()); } $this->_matches = $query->getQueryTerms(); return $query; }