public function searchAction() { $filters = array('q' => array('StringTrim', 'StripTags')); $validators = array('q' => array('presence' => 'required')); $input = new Zend_Filter_Input($filters, $validators, $_GET); if (is_string($this->_request->getParam('q'))) { $queryString = $input->getEscaped('q'); $this->view->queryString = $queryString; if ($input->isValid()) { $config = Zend_Registry::get('config'); $index = App_Search_Lucene::open($config->luceneIndex); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $pathTerm = new Zend_Search_Lucene_Index_Term($queryString); $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm); $query->addSubquery($pathQuery, true); $pathTerm = new Zend_Search_Lucene_Index_Term('20091023', 'CreationDate'); $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm); $query->addSubquery($pathQuery, true); try { $hits = $index->find($query); } catch (Zend_Search_Lucene_Exception $ex) { $hits = array(); } $this->view->hits = $hits; } else { $this->view->messages = $input->getMessages(); } } }
public function generateSitemap() { $this->prepareSiteMapFolder(); if (!is_null($this->sitemapDir)) { $hosts = $this->getValidHosts(); if (is_array($hosts)) { foreach ($hosts as $hostName) { $query = new \Zend_Search_Lucene_Search_Query_Boolean(); $hostTerm = new \Zend_Search_Lucene_Index_Term($hostName, 'host'); $hostQuery = new \Zend_Search_Lucene_Search_Query_Term($hostTerm); $query->addSubquery($hostQuery, TRUE); $hostTerm = new \Zend_Search_Lucene_Index_Term(TRUE, 'restrictionGroup_default'); $hostQuery = new \Zend_Search_Lucene_Search_Query_Term($hostTerm); $query->addSubquery($hostQuery, TRUE); $hits = $this->index->find($query); $name = str_replace('.', '-', $hostName); $filePath = $this->sitemapDir . '/sitemap-' . $name . '.xml'; $fh = fopen($filePath, 'w'); fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n"); fwrite($fh, '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'); fwrite($fh, "\r\n"); for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField('url'); $uri = str_replace(array('?pimcore_outputfilters_disabled=1', '&pimcore_outputfilters_disabled=1'), '', $url->value); fwrite($fh, '<url>' . "\r\n"); fwrite($fh, ' <loc>' . htmlspecialchars($uri, ENT_QUOTES) . '</loc>' . "\r\n"); fwrite($fh, '</url>' . "\r\n"); } fwrite($fh, '</urlset>' . "\r\n"); fclose($fh); } $filePath = $this->sitemapDir . '/sitemap.xml'; $fh = fopen($filePath, 'w'); fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n"); fwrite($fh, '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'); fwrite($fh, "\r\n"); foreach ($hosts as $hostName) { $name = str_replace('.', '-', $hostName); //first host must be main domain - see hint in plugin settings $currenthost = $hosts[0]; fwrite($fh, '<sitemap>' . "\r\n"); fwrite($fh, ' <loc>http://' . $currenthost . '/plugin/LuceneSearch/frontend/sitemap/?sitemap=sitemap-' . $name . '.xml' . '</loc>' . "\r\n"); fwrite($fh, '</sitemap>' . "\r\n"); \Pimcore\Logger::debug('LuceneSearch: ' . $hostName . ' for sitemap.xml added.'); } fwrite($fh, '</sitemapindex>' . "\r\n"); fclose($fh); } else { \Pimcore\Logger::debug('LuceneSearch: could not generate sitemaps, did not find any hosts in index.'); } } else { \Pimcore\Logger::emerg('LuceneSearch: Cannot generate sitemap. Sitemap directory [ ' . $this->sitemapDir . ' ] not available/not writeable and cannot be created'); } }
public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { return new Zend_Search_Lucene_Search_Query_Empty(); } // Check, that all fields are qualified $allQualified = true; foreach ($this->_terms as $term) { if ($term->field === null) { $allQualified = false; break; } } if ($allQualified) { return $this; } else { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($this->_terms as $termId => $term) { $subquery = new Zend_Search_Lucene_Search_Query_Term($term); $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]); } return $query; } }
public function execute($request) { if (!isset($request->limit)) { $request->limit = sfConfig::get('app_hits_per_page'); } $this->resource = $this->getRoute()->resource; // Check that this isn't the root if (!isset($this->resource->parent)) { $this->forward404(); } $search = new QubitSearch(); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($this->resource->id, 'parentId')), true); if (isset($request->query)) { $query = $request->query; } $query = QubitAcl::searchFilterByRepository($query, 'read'); $query = QubitAcl::searchFilterDrafts($query); $this->pager = new QubitArrayPager(); $this->pager->hits = $search->getEngine()->getIndex()->find($query); $this->pager->setMaxPerPage($request->limit); $this->pager->setPage($request->page); $ids = array(); foreach ($this->pager->getResults() as $hit) { $ids[] = $hit->getDocument()->id; } $criteria = new Criteria(); $criteria->add(QubitInformationObject::ID, $ids, Criteria::IN); $this->informationObjects = QubitInformationObject::get($criteria); }
public function rewrite(Zend_Search_Lucene_Interface $index) { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($this->_subqueries as $subqueryId => $subquery) { $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$subqueryId]); } return $query; }
/** * Generates a Lucene object from the expression objects. * * @param array $names Associative list of variable or column names as keys and their corresponding types * @param array $translations Associative list of variable or column names that should be translated * @param array $plugins Associative list of item names and plugins implementing MW_Common_Criteria_Plugin_Interface * @return Zend_Search_Lucene_Search_Query_MultiTerm Combined search objects */ public function toString(array $types, array $translations = array(), array $plugins = array()) { $query = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($this->_expressions as $expr) { if (($itemstr = $expr->toString($types, $translations, $plugins)) !== '') { $query->addSubquery($itemstr, self::$_operators[$this->_operator]); } } return $query; }
public function userSearch(Kwf_Component_Data $subroot, $queryString, $offset, $limit, $params = array()) { $index = Kwf_Util_Fulltext_Lucene::getInstance($subroot); $error = false; $userQuery = false; if ($queryString) { try { $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryString); } catch (ErrorException $e) { //ignore iconv errors that happen with invalid input } } $hits = array(); if ($userQuery) { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($userQuery, true); if (isset($params['type'])) { $pathTerm = new Zend_Search_Lucene_Index_Term($params['type'], 'type'); $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm); $query->addSubquery($pathQuery, true); } $time = microtime(true); try { $hits = $index->find($query); } catch (Zend_Search_Lucene_Exception $e) { $error = $subroot->trlKwf('Invalid search terms'); } } $ret = array(); if (count($hits)) { $numStart = $offset; $numEnd = min(count($hits), $offset + $limit); for ($i = $numStart; $i < $numEnd; $i++) { $h = $hits[$i]; $c = Kwf_Component_Data_Root::getInstance()->getComponentById($h->componentId); if ($c) { $ret[] = array('data' => $c, 'content' => $h->content); } } } return array('error' => $error, 'numHits' => count($hits), 'hits' => $ret); }
/** * @param $queryString * @return array */ public function find($queryString) { $queryString = trim($queryString); if (empty($queryString)) { return ["queryString" => $queryString, "message" => "No String"]; } else { $index = \Zend_Search_Lucene::open($this->indexfile); $res = explode(' ', $queryString); \Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(1); \Zend_Search_Lucene::setResultSetLimit(5); $query = new \Zend_Search_Lucene_Search_Query_Boolean(); foreach ($res as $val) { if (!empty($val)) { $subquery = new \Zend_Search_Lucene_Search_Query_Boolean(); $searchkey1 = $val . "*"; $pattern = new \Zend_Search_Lucene_Index_Term($searchkey1, "name"); $userQuery = new \Zend_Search_Lucene_Search_Query_Wildcard($pattern); $patternUsername = new \Zend_Search_Lucene_Index_Term($searchkey1, "username"); $usernameQuery = new \Zend_Search_Lucene_Search_Query_Wildcard($patternUsername); $subquery->addSubquery($userQuery, null); $subquery->addSubquery($usernameQuery, null); $query->addSubquery($subquery, true); } } $hits = $index->find($query); if (!empty($hits)) { $results = []; foreach ($hits as $hit) { if ($hit->username != $_SESSION['user']->username) { $results[] = $hit->username; } } if (!empty($results)) { /** @noinspection PhpUndefinedMethodInspection */ /** @var Users $users */ $users = $_SESSION['user']->getTable(); if (isset($_POST['friends'])) { /** @noinspection PhpUndefinedMethodInspection */ $friends = $_SESSION['user']->getFriendList(); if (empty($friends)) { return ["queryString" => $queryString, "users" => []]; } else { $userresult = $users->getSet($results, 'u.username'); } } else { $userresult = $users->getSet($results, "u.username", ["u.userid", "u.username", "u.name"]); } return ["queryString" => $queryString, "users" => $userresult->toArray()]; } } } return ["queryString" => $queryString]; }
public function parseQuery() { try { // Parse query string $queryParsed = QubitSearch::getInstance()->parse($this->request->query); } catch (Exception $e) { $this->error = $e->getMessage(); return null; } $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($queryParsed, true); return $query; }
protected function prepareLuceneQuery($keyword) { $keyword = strtolower($keyword); $query = new Zend_Search_Lucene_Search_Query_Boolean(); # multiterm query $subquery1 = new Zend_Search_Lucene_Search_Query_MultiTerm(); foreach (explode(' ', $keyword) as $key) { if (!trim($key)) { continue; } $subquery1->addTerm(new Zend_Search_Lucene_Index_Term($key)); } # wildcard query Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(1); $tokens = preg_split('/ /', $keyword, -1, PREG_SPLIT_NO_EMPTY); $lastWord = trim(array_pop($tokens)) . "*"; $pattern = new Zend_Search_Lucene_Index_Term($lastWord); $subquery2 = new Zend_Search_Lucene_Search_Query_Wildcard($pattern); $query->addSubquery($subquery1); $query->addSubquery($subquery2); return $query; }
protected function getLuceneQuery($query) { $words = str_word_count($query, 1); $query = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($words as $word) { $term = new Zend_Search_Lucene_Index_Term($word); $subQuery = new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4); $query->addSubquery($subQuery, true); } return $query; // return Zend_Search_Lucene_Search_QueryParser::parse($query); // $term = new Zend_Search_Lucene_Index_Term($query); // return new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4); }
function searchDocsByContent($q) { $hits = array(); try { $this->initLuceneEngine(); $indexer = $this->zend->get_Zend_Search_Lucene(); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $subquery = Zend_Search_Lucene_Search_QueryParser::parse('+(' . $q . ')'); $query->addSubquery($subquery, true); // $query->addSubquery(self::makeTermQuery('object_type', JS_TEXT_DATA), true); $hits = $indexer->find($query); return $hits; } catch (Exception $e) { echo $e->getTraceAsString(); } return $hits; }
public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { return new Zend_Search_Lucene_Search_Query_Empty(); } else { if ($this->_terms[0]->field !== null) { return $this; } else { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($index->getFieldNames(true) as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Phrase(); $subquery->setSlop($this->getSlop()); foreach ($this->_terms as $termId => $term) { $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName); $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]); } $query->addSubquery($subquery); } return $query; } } }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query * @throws Zend_Search_Lucene_Exception */ public function rewrite(Zend_Search_Lucene_Interface $index) { $this->_matches = array(); $this->_scores = array(); $this->_termKeys = array(); if ($this->_term->field === null) { // Search through all fields $fields = $index->getFieldNames(true); } else { $fields = array($this->_term->field); } //$1 'Zend/Search/Lucene/Index/Term.php'; $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength); $prefixByteLength = strlen($prefix); $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix); $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text); $termRest = substr($this->_term->text, $prefixByteLength); // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible $termRestLength = strlen($termRest); $scaleFactor = 1 / (1 - $this->_minimumSimilarity); //$1 'Zend/Search/Lucene.php'; $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit(); foreach ($fields as $field) { $index->resetTermsStream(); //$1 'Zend/Search/Lucene/Index/Term.php'; if ($prefix != '') { $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field && substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) { // Calculate similarity $target = substr($index->currentTerm()->text, $prefixByteLength); $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target)); if ($termRestLength == 0) { // we don't have anything to compare. That means if we just add // the letters for current term we get the new word $similarity = $prefixUtf8Length == 0 ? 0 : 1 - strlen($target) / $prefixUtf8Length; } else { if (strlen($target) == 0) { $similarity = $prefixUtf8Length == 0 ? 0 : 1 - $termRestLength / $prefixUtf8Length; } else { if ($maxDistance < abs($termRestLength - strlen($target))) { //just adding the characters of term to target or vice-versa results in too many edits //for example "pre" length is 3 and "prefixes" length is 8. We can see that //given this optimal circumstance, the edit distance cannot be less than 5. //which is 8-3 or more precisesly abs(3-8). //if our maximum edit distance is 4, then we can discard this word //without looking at it. $similarity = 0; } else { $similarity = 1 - levenshtein($termRest, $target) / ($prefixUtf8Length + min($termRestLength, strlen($target))); } } } if ($similarity > $this->_minimumSimilarity) { $this->_matches[] = $index->currentTerm(); $this->_termKeys[] = $index->currentTerm()->key(); $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor; if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { //$1 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.'); } } $index->nextTerm(); } } else { $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) { // Calculate similarity $target = $index->currentTerm()->text; $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance(0, $termRestLength, strlen($target)); if ($maxDistance < abs($termRestLength - strlen($target))) { //just adding the characters of term to target or vice-versa results in too many edits //for example "pre" length is 3 and "prefixes" length is 8. We can see that //given this optimal circumstance, the edit distance cannot be less than 5. //which is 8-3 or more precisesly abs(3-8). //if our maximum edit distance is 4, then we can discard this word //without looking at it. $similarity = 0; } else { $similarity = 1 - levenshtein($termRest, $target) / min($termRestLength, strlen($target)); } if ($similarity > $this->_minimumSimilarity) { $this->_matches[] = $index->currentTerm(); $this->_termKeys[] = $index->currentTerm()->key(); $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor; if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { //$1 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.'); } } $index->nextTerm(); } } $index->closeTermsStream(); } if (count($this->_matches) == 0) { //$1 'Zend/Search/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } else { if (count($this->_matches) == 1) { //$1 'Zend/Search/Lucene/Search/Query/Term.php'; return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches)); } else { //$1 'Zend/Search/Lucene/Search/Query/Boolean.php'; $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean(); array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC, $this->_termKeys, SORT_ASC, SORT_STRING, $this->_matches); $termCount = 0; //$1 'Zend/Search/Lucene/Search/Query/Term.php'; foreach ($this->_matches as $id => $matchedTerm) { $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm); $subquery->setBoost($this->_scores[$id]); $rewrittenQuery->addSubquery($subquery); $termCount++; if ($termCount >= self::MAX_CLAUSE_COUNT) { break; } } return $rewrittenQuery; } } }
/** * Generate 'boolean style' query from the context * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)' * * @return Zend_Search_Lucene_Search_Query * @throws Zend_Search_Lucene */ private function _booleanExpressionQuery() { /** * We treat each level of an expression as a boolean expression in * a Disjunctive Normal Form * * AND operator has higher precedence than OR * * Thus logical query is a disjunction of one or more conjunctions of * one or more query entries */ require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php'; $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer(); require_once 'Zend/Search/Lucene/Exception.php'; try { foreach ($this->_entries as $entry) { if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) { $expressionRecognizer->processLiteral($entry); } else { switch ($entry) { case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME: $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR); break; case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME: $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR); break; case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME: $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR); break; default: throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.'); } } } $conjuctions = $expressionRecognizer->finishExpression(); } catch (Zend_Search_Exception $e) { // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' . // $e->getMessage() . '\'.' ); // It's query syntax error message and it should be user friendly. So FSM message is omitted require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e); } // Remove 'only negative' conjunctions foreach ($conjuctions as $conjuctionId => $conjuction) { $nonNegativeEntryFound = false; foreach ($conjuction as $conjuctionEntry) { if ($conjuctionEntry[1]) { $nonNegativeEntryFound = true; break; } } if (!$nonNegativeEntryFound) { unset($conjuctions[$conjuctionId]); } } $subqueries = array(); foreach ($conjuctions as $conjuction) { // Check, if it's a one term conjuction if (count($conjuction) == 1) { $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding); } else { require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $subquery = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($conjuction as $conjuctionEntry) { $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]); } $subqueries[] = $subquery; } } if (count($subqueries) == 0) { require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($subqueries) == 1) { return $subqueries[0]; } require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($subqueries as $subquery) { // Non-requirered entry/subquery $query->addSubquery($subquery); } return $query; }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { // Allow to use wildcards within phrases // They are either removed by text analyzer or used as a part of keyword for keyword fields // // if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) { // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; // throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.'); // } // Split query into subqueries if field name is not specified if ($this->_field === null) { require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); require_once 'Zend/Search/Lucene.php'; if (Zend_Search_Lucene::getDefaultSearchField() === null) { $searchFields = $index->getFieldNames(true); } else { $searchFields = array(Zend_Search_Lucene::getDefaultSearchField()); } foreach ($searchFields as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase, $this->_phraseEncoding, $fieldName); $subquery->setSlop($this->getSlop()); $query->addSubquery($subquery->rewrite($index)); } $this->_matches = $query->getQueryTerms(); return $query; } // Recognize exact term matching (it corresponds to Keyword fields stored in the index) // encoding is not used since we expect binary matching require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field); if ($index->hasTerm($term)) { require_once 'Zend/Search/Lucene/Search/Query/Term.php'; $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } // tokenize phrase using current analyzer and process it as a phrase query require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding); if (count($tokens) == 0) { $this->_matches = array(); require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($tokens) == 1) { require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field); require_once 'Zend/Search/Lucene/Search/Query/Term.php'; $query = new Zend_Search_Lucene_Search_Query_Term($term); $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } //It's non-trivial phrase query $position = -1; require_once 'Zend/Search/Lucene/Search/Query/Phrase.php'; $query = new Zend_Search_Lucene_Search_Query_Phrase(); require_once 'Zend/Search/Lucene/Index/Term.php'; foreach ($tokens as $token) { $position += $token->getPositionIncrement(); $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field); $query->addTerm($term, $position); $query->setSlop($this->getSlop()); } $this->_matches = $query->getQueryTerms(); return $query; }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { require_once 'Zend/Search/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } // Check, that all fields are qualified $allQualified = true; foreach ($this->_terms as $term) { if ($term->field === null) { $allQualified = false; break; } } if ($allQualified) { return $this; } else { /** transform multiterm query to boolean and apply rewrite() method to subqueries. */ require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); require_once 'Zend/Search/Lucene/Search/Query/Term.php'; foreach ($this->_terms as $termId => $term) { $subquery = new Zend_Search_Lucene_Search_Query_Term($term); $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]); } return $query; } }
public function findAction() { $queryFromRequest = $this->cleanRequestString($_REQUEST["query"]); $categoryFromRequest = $this->cleanRequestString($_REQUEST["cat"]); $searcher = new SearchPhp_Frontend_Searcher(); $this->view->groupByCategory = $this->_getParam("groupByCategory"); $this->view->omitSearchForm = $this->_getParam("omitSearchForm"); $this->view->categoryOrder = $this->_getParam("categoryOrder"); $this->view->omitJsIncludes = $this->_getParam("omitJsIncludes"); $perPage = $this->_getParam("perPage"); if (empty($perPage)) { $perPage = 10; } $page = $this->_getParam("page"); if (empty($page)) { $page = 1; } $queryStr = strtolower($queryFromRequest); $this->view->category = $categoryFromRequest; if (!empty($this->view->category)) { $category = $this->view->category; } else { $category = null; } $pluginConf = SearchPhp_Plugin::getSearchConfigArray(); if (!empty($pluginConf["search"]["frontend"]["categories"])) { $this->view->availableCategories = explode(",", $pluginConf["search"]["frontend"]["categories"]); } $doFuzzy = $this->_getParam("fuzzy"); try { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $field = $this->_getParam("field"); if (!empty($field)) { Zend_Search_Lucene::setDefaultSearchField($field); } $searchResults = array(); if (!empty($queryStr)) { if ($doFuzzy) { $queryStr = str_replace(" ", "~ ", $queryStr); $queryStr .= "~"; Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr, 'utf-8'); $query->addSubquery($userQuery, true); if (!empty($this->searchLanguage)) { if (is_object($this->searchLanguage)) { $lang = $this->searchLanguage->toString(); } else { $lang = $this->searchLanguage; } $lang = str_replace(array("_", "-"), "", $lang); $languageTerm = new Zend_Search_Lucene_Index_Term($lang, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } $start = $perPage * ($page - 1); $end = $start + ($perPage - 1); if ($end > count($validHits) - 1) { $end = count($validHits) - 1; } for ($i = $start; $i <= $end; $i++) { $hit = $validHits[$i]; $url = $hit->getDocument()->getField("url"); $title = $hit->getDocument()->getField("title"); $searchResult['boost'] = $hit->getDocument()->boost; $searchResult['title'] = $title->value; $searchResult['url'] = $url->value; $searchResult['sumary'] = $searcher->getSumaryForUrl($url->value, $queryStr); try { if ($hit->getDocument()->getField("h1")) { $searchResult['h1'] = $hit->getDocument()->getField("h1")->value; } } catch (Zend_Search_Lucene_Exception $e) { } foreach ($this->categories as $category) { try { $searchResult['categories'][] = $hit->getDocument()->getField("cat")->value; } catch (Zend_Search_Lucene_Exception $e) { } } $searchResults[] = $searchResult; unset($searchResult); } } if (count($validHits) < 1) { $this->view->pages = 0; } else { $this->view->pages = ceil(count($validHits) / $perPage); } $this->view->perPage = $perPage; $this->view->page = $page; $this->view->total = count($validHits); $this->view->query = $queryStr; $this->view->searchResults = $searchResults; if ($this->fuzzySearch) { //look for similar search terms if (!empty($queryStr) and (empty($searchResults) or count($searchResults) < 1)) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 3); if (empty($terms) or count($terms) < 1) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 0); } $suggestions = array(); if (is_array($terms)) { $counter = 0; foreach ($terms as $term) { $t = $term->text; //check if term can be found for current language if ($this->searchLanguage != null) { if (is_object($this->searchLanguage)) { $language = $this->searchLanguage->toString(); } else { $language = $this->searchLanguage; } $language = str_replace(array("_", "-"), "", $language); } $hits = null; $query = new Zend_Search_Lucene_Search_Query_Boolean(); if ($language != null) { $languageTerm = new Zend_Search_Lucene_Index_Term($language, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8'); $query->addSubquery($userQuery, true); $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; if (count($hits) == 1) { $url = $hits[0]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[0]; } } for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } if (count($validHits) > 0 and !in_array($t, $suggestions)) { $suggestions[] = $t; if ($counter >= 20) { break; } $counter++; } } } $this->view->suggestions = $suggestions; } } } catch (Exception $e) { Logger::log("An Exception occured during search:", Zend_Log::ERR); Logger::log($e, Zend_Log::ERR); $this->view->searchResults = array(); } if ($this->_getParam("viewscript")) { $this->renderScript($this->_getParam("viewscript")); } }
/** * * @return Zend_Search_Lucene_Search_Query $query * @param string $queryString */ public function createFuzzyQuery($queryString) { Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($this->_encoding); $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryString, $this->_encoding); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($userQuery); $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($queryString, $this->_encoding); if (2 > count($tokens)) { $term = new Zend_Search_Lucene_Index_Term($queryString, 'name'); $fuzzy = new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4); $query->addSubquery($fuzzy); } return $query; }
protected function handleMulti($name, $val, $required) { /* $subquery = new Zend_Search_Lucene_Search_Query_MultiTerm(); foreach($val as $v){ $subquery->addTerm( new Zend_Search_Lucene_Index_Term($v,$name) ///$required ); }//foreach */ $subquery = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($val as $v) { $subquery->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($v, $name))); } //foreach return $subquery; }
public function parseQuery() { QubitSearch::getInstance(); $queryBuilt = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($this->request->searchFields as $searchField) { // if no terms for this field, skip it if (empty($searchField['query'])) { continue; } // enclose phrase searches in quotes (strip existing ones) if ('phrase' == $searchField['match']) { $term = '"' . str_replace(array('"', "'"), '', strtolower($searchField['query'])) . '"'; } else { $term = strtolower($searchField['query']); } $matchString = $term; // limit to specified field if (!empty($searchField['field'])) { $term = $searchField['field'] . ':' . $term; } if (!empty($searchField['field'])) { $field = ucfirst($searchField['field']); } else { $field = 'phrase' == $searchField['match'] ? $this->getContext()->i18n->__('Phrase') : $this->getContext()->i18n->__('Keyword(s)'); } $this->queryTerms[] = array('term' => $field . ': ' . $matchString, 'operator' => $searchField['operator']); // select which boolean operator to use if (!isset($searchField['operator'])) { $searchField['operator'] = null; } switch ($searchField['operator']) { case 'not': $token = false; break; case 'or': $token = null; break; case 'and': default: $token = true; break; } $queryBuilt->addSubquery(QubitSearch::getInstance()->parse($term), $token); } $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($queryBuilt, true); return $query; }
public function searchAction() { Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); // $request = Zend_Controller_Front::getInstance()->getRequest(); $cfg = Zend_Registry::get('cfg'); $indexPath = $cfg['search']['indexpath']; // $pagesize = $this->getRequest()->getParam('pagesize', $cfg['search']['pagesize']); $hits = array(); $queryStr = trim($this->getRequest()->getParam('query')); $index = Zend_Search_Lucene::open($indexPath); if (strlen($queryStr) > 0) { $index = Zend_Search_Lucene::open($indexPath); $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($userQuery, true); try { $hits = $index->find($query); } catch (Zend_Search_Lucene_Exception $ex) { $hits = array(); } } $view = $this->view; // $view->assign('hits', $hits); $view->assign('query', $queryStr); // $view->assign('pagesize', $pagesize); $paginator = Zend_Paginator::factory($hits); //new Zend_Paginator(new Zend_Paginator_Adapter_Array($hits)); $paginator->setCurrentPageNumber($this->getRequest()->getParam('page', 0)); $paginator->setItemCountPerPage($this->getRequest()->getParam('pagesize', $cfg['search']['pagesize'])); Zend_Paginator::setDefaultScrollingStyle('Sliding'); Zend_View_Helper_PaginationControl::setDefaultViewPartial('pagination_search_control.phtml'); $view->assign('paginator', $paginator); $view->queryParams = $this->getRequest()->getParams(); /* $req = Zend_Controller_Front::getInstance()->getRequest(); $paginator->setCurrentPageNumber($req->getParam('page', 0)); $paginator->setItemCountPerPage($req->getParam('itemsperpage', $items_on_page));*/ // return $paginator; }
function search() { if (!empty($this->data['Tutorial'])) { // convert POST to Cake named params (it's prettier than GET) $this->redirect(array_merge($this->params['named'], $this->data['Tutorial'])); } // default to boolean AND searching Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); $query = ''; // Are there any parameters besides page? $named_params = array_diff_key($this->params['named'], array('page' => '')); if (!empty($named_params)) { // sanitize with exceptions for Zend Lucene query language. (Do the exceptions introduce a vulnerability? // Can Zend Lucene validate a query ahead of time?) if (isset($this->params['named']['term'])) { // $query = Sanitize::paranoid($this->params['named']['term'], // array(' ', '"', "'", ':', '?', '*', '~', '[', ']', '_', '-', '{', '}', '.', '^', '+', '-', '(', // ')', '&', '|', '!')); $query = $this->params['named']['term']; if (!empty($query)) { // Intercept invalid queries try { Zend_Search_Lucene_Search_QueryParser::dontSuppressQueryParsingExceptions(); $parsed_query = Zend_Search_Lucene_Search_QueryParser::parse($query); } catch (Zend_Search_Lucene_Exception $e) { // Why can't I catch Zend_Search_Lucene_Search_QueryParserException? $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?"); $this->redirect(array('action' => 'search', 'term' => Sanitize::paranoid($query, array(" ")))); } } } else { $parsed_query = new Zend_Search_Lucene_Search_Query_Boolean(); } try { if (isset($this->params['named']['learning_goal'])) { $learning_goals = explode('|', $this->params['named']['learning_goal']); foreach ($learning_goals as $learning_goal) { if (is_numeric($learning_goal)) { $learning_goal_term = new Zend_Search_Lucene_Index_Term($learning_goal, 'learning_goal'); $learning_goal_query = new Zend_Search_Lucene_Search_Query_Term($learning_goal_term); $parsed_query->addSubquery($learning_goal_query, true); } } } if (isset($this->params['named']['resource_type'])) { $resource_types = explode('|', $this->params['named']['resource_type']); foreach ($resource_types as $resource_type) { if (is_numeric($resource_type)) { $resource_type_term = new Zend_Search_Lucene_Index_Term($resource_type, 'resource_type'); $resource_type_query = new Zend_Search_Lucene_Search_Query_Term($resource_type_term); $parsed_query->addSubquery($resource_type_query, true); } } } if (isset($this->params['named']['keyword'])) { $keywords = explode('|', $this->params['named']['keyword']); foreach ($keywords as $keyword) { if (preg_match('/[A-Za-z0-9\\-]+/', $keyword)) { // valid UUID? $keyword_term = new Zend_Search_Lucene_Index_Term($keyword, 'keyword'); $keyword_query = new Zend_Search_Lucene_Search_Query_Term($keyword_term); $parsed_query->addSubquery($keyword_query, true); } } } } catch (Zend_Search_Lucene_Exception $e) { // Why can't I catch Zend_Search_Lucene_Search_QueryParserException? $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?"); $this->redirect(array('action' => 'search', 'query' => $query)); } $this->paginate['SearchIndex'] = array('limit' => 10, 'conditions' => array('query' => $parsed_query), 'highlight' => true); $this->set('tutorials', $this->paginate($this->Tutorial->SearchIndex)); } else { $this->paginate = array('published', 'limit' => 10, 'order' => 'Tutorial.title ASC', 'conditions' => array('in_index' => true), 'contain' => array('Tag')); $this->set('tutorials', $this->paginate($this->Tutorial)); } $this->layout = 'institution'; $results_context = array('model' => '', 'key' => '', 'id' => 0, 'name' => ''); $this->set(compact('results_context')); $this->set('learningGoals', $this->Tutorial->LearningGoal->find('list')); $this->set('resourceTypes', $this->Tutorial->ResourceType->find('list')); }
/** * Display search results. */ function searchResults() { ZendSearchHandler::setupTemplate(); $plugin =& PluginRegistry::getPlugin('generic', 'ZendSearchPlugin'); $isUsingSolr = $plugin->isUsingSolr(); if ($isUsingSolr) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $plugin->getSetting('solrUrl') . '/select'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); curl_setopt($ch, CURLOPT_POST, 1); $query = ''; } else { $index =& $plugin->getIndex(); $query = new Zend_Search_Lucene_Search_Query_Boolean(); } $q = Request::getUserVar('q'); if (!empty($q)) { if ($isUsingSolr) { $query .= 'text:"' . ZendSearchHandler::luceneEscape($q) . '" '; } else { $query->addSubquery(Zend_Search_Lucene_Search_QueryParser::parse($q)); } } $searchFormElementDao =& DAORegistry::getDAO('SearchFormElementDAO'); $searchFormElements =& $searchFormElementDao->getSearchFormElements(); while ($searchFormElement =& $searchFormElements->next()) { $searchFormElementId = $searchFormElement->getSearchFormElementId(); $symbolic = $searchFormElement->getSymbolic(); switch ($searchFormElement->getType()) { case SEARCH_FORM_ELEMENT_TYPE_SELECT: case SEARCH_FORM_ELEMENT_TYPE_STRING: $term = Request::getUserVar($symbolic); if (!empty($term)) { if ($isUsingSolr) { $query .= $symbolic . ':"' . ZendSearchHandler::luceneEscape($term) . '" '; } else { $query->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($term, $symbolic)), true); } } break; case SEARCH_FORM_ELEMENT_TYPE_DATE: $from = Request::getUserDateVar($symbolic . '-from'); $to = Request::getUserDateVar($symbolic . '-to'); if (!empty($from) && !empty($to)) { if ($isUsingSolr) { $query .= $symbolic . ':[' . strftime('%Y-%m-%dT%H:%M:%SZ', $from) . ' TO ' . strftime('%Y-%m-%dT%H:%M:%SZ', $to) . '] '; } else { $fromTerm = new Zend_Search_Lucene_Index_Term($from, $symbolic); $toTerm = new Zend_Search_Lucene_Index_Term($to, $symbolic); $query->addSubquery(new Zend_Search_Lucene_Search_Query_Range($fromTerm, $toTerm, true), true); } } break; default: fatalError('Unknown element type!'); } unset($searchFormElement); } $rangeInfo =& PKPHandler::getRangeInfo('results'); if ($isUsingSolr) { $itemsPerPage = Config::getVar('interface', 'items_per_page'); curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . trim(urlencode($query)) . '&rows=' . urlencode($itemsPerPage) . ($rangeInfo ? '&start=' . $rangeInfo->getPage() * $itemsPerPage : '')); $data = curl_exec($ch); $xmlParser = new XMLParser(); $result = null; $numFound = 0; @($result =& $xmlParser->parseTextStruct($data, array('str', 'result'))); $recordIds = array(); if ($result) { foreach ($result as $nodeSet) { foreach ($nodeSet as $node) { if (isset($node['attributes']['name']) && $node['attributes']['name'] == 'id') { $recordIds[] = $node['value']; } elseif (isset($node['attributes']['numFound'])) { $numFound = $node['attributes']['numFound']; } } } } $plugin->import('SolrResultIterator'); $resultsIterator =& SolrResultIterator::fromRangeInfo($recordIds, $numFound, $rangeInfo); unset($recordIds); } else { $resultsArray = $index->find($query); $plugin->import('ZendSearchResultIterator'); $resultsIterator =& ZendSearchResultIterator::fromRangeInfo($resultsArray, $rangeInfo); unset($resultsArray); } $templateMgr =& TemplateManager::getManager(); $templateMgr->assign_by_ref('recordDao', DAORegistry::getDAO('RecordDAO')); $templateMgr->assign_by_ref('results', $resultsIterator); $templateMgr->assign_by_ref('q', $q); $templateMgr->display($plugin->getTemplatePath() . 'results.tpl'); }
public static function search( $query, $subqueries = array()) { $query = strtolower($query); Loader::library('3rdparty/Zend/Search/Lucene'); Loader::library('3rdparty/StandardAnalyzer/Analyzer/Standard/English'); $index = new Zend_Search_Lucene(DIR_FILES_CACHE_PAGES); $index->setResultSetLimit(200); //Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); $queryModifiers=array(); $mainQuery = Zend_Search_Lucene_Search_QueryParser::parse($query, APP_CHARSET); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($mainQuery, true); foreach($subqueries as $subQ) { if( !is_array($subQ) || !isset( $subQ['query'] ) ) $subQuery = $subQ; else $subQuery = $subQ['query']; if( !is_array($subQ) || !isset($subQ['required']) ) $required=true; else $required=$subQ['required']; $query->addSubquery( $subQuery, $required ); } $query = utf8_encode($query); $resultsTmp = $index->find($query); $results = array(); foreach($resultsTmp as $r) $results[] = new IndexedSearchResult($r->cID, $r->cName, $r->cDescription, $r->score, $r->cPath, $r->cBody); return $results; }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { if (count($this->_terms) == 0) { require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } else { if ($this->_terms[0]->field !== null) { return $this; } else { require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->setBoost($this->getBoost()); foreach ($index->getFieldNames(true) as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Phrase(); $subquery->setSlop($this->getSlop()); require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Index/Term.php'; foreach ($this->_terms as $termId => $term) { $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName); $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]); } $query->addSubquery($subquery); } return $query; } } }
/** * Re-write query into primitive queries in the context of specified index * * @param Zend_Search_Lucene_Interface $index * @return Zend_Search_Lucene_Search_Query */ public function rewrite(Zend_Search_Lucene_Interface $index) { if ($this->_field === null) { require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; $query = new Zend_Search_Lucene_Search_Query_Boolean(); $hasInsignificantSubqueries = false; require_once 'Zend/Search/Lucene.php'; if (Zend_Search_Lucene::getDefaultSearchField() === null) { $searchFields = $index->getFieldNames(true); } else { $searchFields = array(Zend_Search_Lucene::getDefaultSearchField()); } require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php'; foreach ($searchFields as $fieldName) { $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_word, $this->_encoding, $fieldName, $this->_minimumSimilarity); $rewrittenSubquery = $subquery->rewrite($index); if (!($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant || $rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Empty)) { $query->addSubquery($rewrittenSubquery); } if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) { $hasInsignificantSubqueries = true; } } $subqueries = $query->getSubqueries(); if (count($subqueries) == 0) { $this->_matches = array(); if ($hasInsignificantSubqueries) { require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; return new Zend_Search_Lucene_Search_Query_Insignificant(); } else { require_once 'Zend/Search/Lucene/Search/Query/Empty.php'; return new Zend_Search_Lucene_Search_Query_Empty(); } } if (count($subqueries) == 1) { $query = reset($subqueries); } $query->setBoost($this->getBoost()); $this->_matches = $query->getQueryTerms(); return $query; } // ------------------------------------- // Recognize exact term matching (it corresponds to Keyword fields stored in the index) // encoding is not used since we expect binary matching require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field); if ($index->hasTerm($term)) { require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php'; $query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity); $query->setBoost($this->getBoost()); // Get rewritten query. Important! It also fills terms matching container. $rewrittenQuery = $query->rewrite($index); $this->_matches = $query->getQueryTerms(); return $rewrittenQuery; } // ------------------------------------- // Recognize wildcard queries /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */ if (@preg_match('/\\pL/u', 'a') == 1) { $subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word)); } else { $subPatterns = preg_split('/[*?]/', $this->_word); } if (count($subPatterns) > 1) { require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search doesn\'t support wildcards (except within Keyword fields).'); } // ------------------------------------- // Recognize one-term multi-term and "insignificant" queries require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding); if (count($tokens) == 0) { $this->_matches = array(); require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; return new Zend_Search_Lucene_Search_Query_Insignificant(); } if (count($tokens) == 1) { require_once 'Zend/Search/Lucene/Index/Term.php'; $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field); require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php'; $query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity); $query->setBoost($this->getBoost()); // Get rewritten query. Important! It also fills terms matching container. $rewrittenQuery = $query->rewrite($index); $this->_matches = $query->getQueryTerms(); return $rewrittenQuery; } // Word is tokenized into several tokens require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms'); }
try { $criteria->addField('string', 'foo'); $t->pass('->addField() accepted a string value'); $s = $criteria->getQuery()->getSubqueries(); $t->ok($s[0] == new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term('string', 'foo')), '->addField() registers the new field'); } catch (Exception $e) { $t->fail('->addField() accepted a string value'); $t->skip('->addField() registers the new field'); } try { $criteria->addField(range(1, 10), 'bar'); $t->pass('->addField() accepted an array value'); $s = $criteria->getQuery()->getSubqueries(); $q = new Zend_Search_Lucene_Search_Query_Boolean(); foreach (range(1, 10) as $value) { $q->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($value, 'bar'), true)); } $t->ok($s[1] == $q, '->addField() registers the array field'); } catch (Exception $e) { $t->fail('->addField() accepted an array value'); $t->skip('->addField() registers the array field'); } try { $criteria->addField(new Foo()); $t->fail('->addField() rejects invalid values'); } catch (Exception $e) { $t->pass('->addField() rejects invalid values'); } $t->diag('testing addMultiTerm()'); $s = inst()->addMultiTerm(range(1, 10), 'foo')->getQuery()->getSubqueries(); $q = new Zend_Search_Lucene_Search_Query_MultiTerm();
private function buildCondition($childNodes, $required) { $result = new Zend_Search_Lucene_Search_Query_Boolean(); foreach ($childNodes as $child) { // Detect if child is a NOT, and reformulate on the fly to support the syntax if ($child instanceof Zend_Search_Lucene_Search_Query_Boolean) { $signs = $child->getSigns(); if (count($signs) === 1 && $signs[0] === false) { $result->addSubquery(reset($child->getSubqueries()), false); continue; } } $result->addSubquery($child, $required); } return $result; }
public static function deleteFromLuceneIndex(Doctrine_Record $object, $culture = null) { $index = $object->getTable()->getLuceneIndex(); // remove an existing entry $id = $object->getId(); // 20090506: we can't use a regular query string here because // numbers (such as IDs) will get stripped from it. So we have // to build a query using the Zend Search API. Note that this means // the Jobeet sample code is incorrect. // http://framework.zend.com/manual/en/zend.search.lucene.searching.html#zend.search.lucene.searching.query_building $aTerm = new Zend_Search_Lucene_Index_Term($id, 'primarykey'); $aQuery = new Zend_Search_Lucene_Search_Query_Term($aTerm); $query = new Zend_Search_Lucene_Search_Query_Boolean(); $query->addSubquery($aQuery, true); if (!is_null($culture)) { $culture = self::normalizeCulture($culture); $cultureTerm = new Zend_Search_Lucene_Index_Term($culture, 'culture'); // Oops, this said $aTerm before. Thanks to Quentin Dugauthier $cultureQuery = new Zend_Search_Lucene_Search_Query_Term($cultureTerm); $query->addSubquery($cultureQuery, true); } if ($hits = $index->find($query)) { // id is correct. This is the internal Zend search index id which is // not the same thing as the id of our object. // There should actually be only one hit for a specific id and culture foreach ($hits as $hit) { $index->delete($hit->id); } } }