Example #1
0
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     if (count($this->_terms) == 0) {
         return new Zend_Search_Lucene_Search_Query_Empty();
     }
     // Check, that all fields are qualified
     $allQualified = true;
     foreach ($this->_terms as $term) {
         if ($term->field === null) {
             $allQualified = false;
             break;
         }
     }
     if ($allQualified) {
         return $this;
     } else {
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $query->setBoost($this->getBoost());
         foreach ($this->_terms as $termId => $term) {
             $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
             $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]);
         }
         return $query;
     }
 }
 public function execute($request)
 {
     if (!isset($request->limit)) {
         $request->limit = sfConfig::get('app_hits_per_page');
     }
     $this->resource = $this->getRoute()->resource;
     // Check that this isn't the root
     if (!isset($this->resource->parent)) {
         $this->forward404();
     }
     $search = new QubitSearch();
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     $query->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($this->resource->id, 'parentId')), true);
     if (isset($request->query)) {
         $query = $request->query;
     }
     $query = QubitAcl::searchFilterByRepository($query, 'read');
     $query = QubitAcl::searchFilterDrafts($query);
     $this->pager = new QubitArrayPager();
     $this->pager->hits = $search->getEngine()->getIndex()->find($query);
     $this->pager->setMaxPerPage($request->limit);
     $this->pager->setPage($request->page);
     $ids = array();
     foreach ($this->pager->getResults() as $hit) {
         $ids[] = $hit->getDocument()->id;
     }
     $criteria = new Criteria();
     $criteria->add(QubitInformationObject::ID, $ids, Criteria::IN);
     $this->informationObjects = QubitInformationObject::get($criteria);
 }
 public function searchAction()
 {
     $filters = array('q' => array('StringTrim', 'StripTags'));
     $validators = array('q' => array('presence' => 'required'));
     $input = new Zend_Filter_Input($filters, $validators, $_GET);
     if (is_string($this->_request->getParam('q'))) {
         $queryString = $input->getEscaped('q');
         $this->view->queryString = $queryString;
         if ($input->isValid()) {
             $config = Zend_Registry::get('config');
             $index = App_Search_Lucene::open($config->luceneIndex);
             $query = new Zend_Search_Lucene_Search_Query_Boolean();
             $pathTerm = new Zend_Search_Lucene_Index_Term($queryString);
             $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm);
             $query->addSubquery($pathQuery, true);
             $pathTerm = new Zend_Search_Lucene_Index_Term('20091023', 'CreationDate');
             $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm);
             $query->addSubquery($pathQuery, true);
             try {
                 $hits = $index->find($query);
             } catch (Zend_Search_Lucene_Exception $ex) {
                 $hits = array();
             }
             $this->view->hits = $hits;
         } else {
             $this->view->messages = $input->getMessages();
         }
     }
 }
Example #4
0
 /**
  * Generates a Lucene object from the expression objects.
  *
  * @param array $names Associative list of variable or column names as keys and their corresponding types
  * @param array $translations Associative list of variable or column names that should be translated
  * @param array $plugins Associative list of item names and plugins implementing MW_Common_Criteria_Plugin_Interface
  * @return Zend_Search_Lucene_Search_Query_MultiTerm Combined search objects
  */
 public function toString(array $types, array $translations = array(), array $plugins = array())
 {
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($this->_expressions as $expr) {
         if (($itemstr = $expr->toString($types, $translations, $plugins)) !== '') {
             $query->addSubquery($itemstr, self::$_operators[$this->_operator]);
         }
     }
     return $query;
 }
 public function generateSitemap()
 {
     $this->prepareSiteMapFolder();
     if (!is_null($this->sitemapDir)) {
         $hosts = $this->getValidHosts();
         if (is_array($hosts)) {
             foreach ($hosts as $hostName) {
                 $query = new \Zend_Search_Lucene_Search_Query_Boolean();
                 $hostTerm = new \Zend_Search_Lucene_Index_Term($hostName, 'host');
                 $hostQuery = new \Zend_Search_Lucene_Search_Query_Term($hostTerm);
                 $query->addSubquery($hostQuery, TRUE);
                 $hostTerm = new \Zend_Search_Lucene_Index_Term(TRUE, 'restrictionGroup_default');
                 $hostQuery = new \Zend_Search_Lucene_Search_Query_Term($hostTerm);
                 $query->addSubquery($hostQuery, TRUE);
                 $hits = $this->index->find($query);
                 $name = str_replace('.', '-', $hostName);
                 $filePath = $this->sitemapDir . '/sitemap-' . $name . '.xml';
                 $fh = fopen($filePath, 'w');
                 fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n");
                 fwrite($fh, '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">');
                 fwrite($fh, "\r\n");
                 for ($i = 0; $i < count($hits); $i++) {
                     $url = $hits[$i]->getDocument()->getField('url');
                     $uri = str_replace(array('?pimcore_outputfilters_disabled=1', '&pimcore_outputfilters_disabled=1'), '', $url->value);
                     fwrite($fh, '<url>' . "\r\n");
                     fwrite($fh, '    <loc>' . htmlspecialchars($uri, ENT_QUOTES) . '</loc>' . "\r\n");
                     fwrite($fh, '</url>' . "\r\n");
                 }
                 fwrite($fh, '</urlset>' . "\r\n");
                 fclose($fh);
             }
             $filePath = $this->sitemapDir . '/sitemap.xml';
             $fh = fopen($filePath, 'w');
             fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n");
             fwrite($fh, '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">');
             fwrite($fh, "\r\n");
             foreach ($hosts as $hostName) {
                 $name = str_replace('.', '-', $hostName);
                 //first host must be main domain - see hint in plugin settings
                 $currenthost = $hosts[0];
                 fwrite($fh, '<sitemap>' . "\r\n");
                 fwrite($fh, '    <loc>http://' . $currenthost . '/plugin/LuceneSearch/frontend/sitemap/?sitemap=sitemap-' . $name . '.xml' . '</loc>' . "\r\n");
                 fwrite($fh, '</sitemap>' . "\r\n");
                 \Pimcore\Logger::debug('LuceneSearch: ' . $hostName . ' for sitemap.xml added.');
             }
             fwrite($fh, '</sitemapindex>' . "\r\n");
             fclose($fh);
         } else {
             \Pimcore\Logger::debug('LuceneSearch: could not generate sitemaps, did not find any hosts in index.');
         }
     } else {
         \Pimcore\Logger::emerg('LuceneSearch: Cannot generate sitemap. Sitemap directory [ ' . $this->sitemapDir . ' ]  not available/not writeable and cannot be created');
     }
 }
Example #6
0
 /**
  * @param $queryString
  * @return array
  */
 public function find($queryString)
 {
     $queryString = trim($queryString);
     if (empty($queryString)) {
         return ["queryString" => $queryString, "message" => "No String"];
     } else {
         $index = \Zend_Search_Lucene::open($this->indexfile);
         $res = explode(' ', $queryString);
         \Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(1);
         \Zend_Search_Lucene::setResultSetLimit(5);
         $query = new \Zend_Search_Lucene_Search_Query_Boolean();
         foreach ($res as $val) {
             if (!empty($val)) {
                 $subquery = new \Zend_Search_Lucene_Search_Query_Boolean();
                 $searchkey1 = $val . "*";
                 $pattern = new \Zend_Search_Lucene_Index_Term($searchkey1, "name");
                 $userQuery = new \Zend_Search_Lucene_Search_Query_Wildcard($pattern);
                 $patternUsername = new \Zend_Search_Lucene_Index_Term($searchkey1, "username");
                 $usernameQuery = new \Zend_Search_Lucene_Search_Query_Wildcard($patternUsername);
                 $subquery->addSubquery($userQuery, null);
                 $subquery->addSubquery($usernameQuery, null);
                 $query->addSubquery($subquery, true);
             }
         }
         $hits = $index->find($query);
         if (!empty($hits)) {
             $results = [];
             foreach ($hits as $hit) {
                 if ($hit->username != $_SESSION['user']->username) {
                     $results[] = $hit->username;
                 }
             }
             if (!empty($results)) {
                 /** @noinspection PhpUndefinedMethodInspection */
                 /** @var Users $users */
                 $users = $_SESSION['user']->getTable();
                 if (isset($_POST['friends'])) {
                     /** @noinspection PhpUndefinedMethodInspection */
                     $friends = $_SESSION['user']->getFriendList();
                     if (empty($friends)) {
                         return ["queryString" => $queryString, "users" => []];
                     } else {
                         $userresult = $users->getSet($results, 'u.username');
                     }
                 } else {
                     $userresult = $users->getSet($results, "u.username", ["u.userid", "u.username", "u.name"]);
                 }
                 return ["queryString" => $queryString, "users" => $userresult->toArray()];
             }
         }
     }
     return ["queryString" => $queryString];
 }
 public function parseQuery()
 {
     try {
         // Parse query string
         $queryParsed = QubitSearch::getInstance()->parse($this->request->query);
     } catch (Exception $e) {
         $this->error = $e->getMessage();
         return null;
     }
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     $query->addSubquery($queryParsed, true);
     return $query;
 }
Example #8
0
 protected function getLuceneQuery($query)
 {
     $words = str_word_count($query, 1);
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($words as $word) {
         $term = new Zend_Search_Lucene_Index_Term($word);
         $subQuery = new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4);
         $query->addSubquery($subQuery, true);
     }
     return $query;
     //  return Zend_Search_Lucene_Search_QueryParser::parse($query);
     //    $term = new Zend_Search_Lucene_Index_Term($query);
     //    return new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4);
 }
Example #9
0
 /**
  * Assigns the query normalization factor to this.
  *
  * @param float $queryNorm
  */
 public function normalize($queryNorm)
 {
     // incorporate boost
     $queryNorm *= $this->_query->getBoost();
     foreach ($this->_weights as $weight) {
         $weight->normalize($queryNorm);
     }
 }
Example #10
0
 function searchDocsByContent($q)
 {
     $hits = array();
     try {
         $this->initLuceneEngine();
         $indexer = $this->zend->get_Zend_Search_Lucene();
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $subquery = Zend_Search_Lucene_Search_QueryParser::parse('+(' . $q . ')');
         $query->addSubquery($subquery, true);
         // $query->addSubquery(self::makeTermQuery('object_type', JS_TEXT_DATA), true);
         $hits = $indexer->find($query);
         return $hits;
     } catch (Exception $e) {
         echo $e->getTraceAsString();
     }
     return $hits;
 }
Example #11
0
 public function userSearch(Kwf_Component_Data $subroot, $queryString, $offset, $limit, $params = array())
 {
     $index = Kwf_Util_Fulltext_Lucene::getInstance($subroot);
     $error = false;
     $userQuery = false;
     if ($queryString) {
         try {
             $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryString);
         } catch (ErrorException $e) {
             //ignore iconv errors that happen with invalid input
         }
     }
     $hits = array();
     if ($userQuery) {
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $query->addSubquery($userQuery, true);
         if (isset($params['type'])) {
             $pathTerm = new Zend_Search_Lucene_Index_Term($params['type'], 'type');
             $pathQuery = new Zend_Search_Lucene_Search_Query_Term($pathTerm);
             $query->addSubquery($pathQuery, true);
         }
         $time = microtime(true);
         try {
             $hits = $index->find($query);
         } catch (Zend_Search_Lucene_Exception $e) {
             $error = $subroot->trlKwf('Invalid search terms');
         }
     }
     $ret = array();
     if (count($hits)) {
         $numStart = $offset;
         $numEnd = min(count($hits), $offset + $limit);
         for ($i = $numStart; $i < $numEnd; $i++) {
             $h = $hits[$i];
             $c = Kwf_Component_Data_Root::getInstance()->getComponentById($h->componentId);
             if ($c) {
                 $ret[] = array('data' => $c, 'content' => $h->content);
             }
         }
     }
     return array('error' => $error, 'numHits' => count($hits), 'hits' => $ret);
 }
Example #12
0
 protected function prepareLuceneQuery($keyword)
 {
     $keyword = strtolower($keyword);
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     # multiterm query
     $subquery1 = new Zend_Search_Lucene_Search_Query_MultiTerm();
     foreach (explode(' ', $keyword) as $key) {
         if (!trim($key)) {
             continue;
         }
         $subquery1->addTerm(new Zend_Search_Lucene_Index_Term($key));
     }
     # wildcard query
     Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(1);
     $tokens = preg_split('/ /', $keyword, -1, PREG_SPLIT_NO_EMPTY);
     $lastWord = trim(array_pop($tokens)) . "*";
     $pattern = new Zend_Search_Lucene_Index_Term($lastWord);
     $subquery2 = new Zend_Search_Lucene_Search_Query_Wildcard($pattern);
     $query->addSubquery($subquery1);
     $query->addSubquery($subquery2);
     return $query;
 }
Example #13
0
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     if (count($this->_terms) == 0) {
         return new Zend_Search_Lucene_Search_Query_Empty();
     } else {
         if ($this->_terms[0]->field !== null) {
             return $this;
         } else {
             $query = new Zend_Search_Lucene_Search_Query_Boolean();
             $query->setBoost($this->getBoost());
             foreach ($index->getFieldNames(true) as $fieldName) {
                 $subquery = new Zend_Search_Lucene_Search_Query_Phrase();
                 $subquery->setSlop($this->getSlop());
                 foreach ($this->_terms as $termId => $term) {
                     $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
                     $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
                 }
                 $query->addSubquery($subquery);
             }
             return $query;
         }
     }
 }
 /**
  * Generate 'boolean style' query from the context
  * 'term1 and term2   or   term3 and (<subquery1>) and not (<subquery2>)'
  *
  * @return Zend_Search_Lucene_Search_Query
  * @throws Zend_Search_Lucene
  */
 private function _booleanExpressionQuery()
 {
     /**
      * We treat each level of an expression as a boolean expression in
      * a Disjunctive Normal Form
      *
      * AND operator has higher precedence than OR
      *
      * Thus logical query is a disjunction of one or more conjunctions of
      * one or more query entries
      */
     require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
     $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
     require_once 'Zend/Search/Lucene/Exception.php';
     try {
         foreach ($this->_entries as $entry) {
             if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
                 $expressionRecognizer->processLiteral($entry);
             } else {
                 switch ($entry) {
                     case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
                         $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
                         break;
                     case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
                         $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
                         break;
                     case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
                         $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
                         break;
                     default:
                         throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
                 }
             }
         }
         $conjuctions = $expressionRecognizer->finishExpression();
     } catch (Zend_Search_Exception $e) {
         // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
         //                                                          $e->getMessage() . '\'.' );
         // It's query syntax error message and it should be user friendly. So FSM message is omitted
         require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
         throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e);
     }
     // Remove 'only negative' conjunctions
     foreach ($conjuctions as $conjuctionId => $conjuction) {
         $nonNegativeEntryFound = false;
         foreach ($conjuction as $conjuctionEntry) {
             if ($conjuctionEntry[1]) {
                 $nonNegativeEntryFound = true;
                 break;
             }
         }
         if (!$nonNegativeEntryFound) {
             unset($conjuctions[$conjuctionId]);
         }
     }
     $subqueries = array();
     foreach ($conjuctions as $conjuction) {
         // Check, if it's a one term conjuction
         if (count($conjuction) == 1) {
             $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
         } else {
             require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
             $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
             foreach ($conjuction as $conjuctionEntry) {
                 $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
             }
             $subqueries[] = $subquery;
         }
     }
     if (count($subqueries) == 0) {
         require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
         return new Zend_Search_Lucene_Search_Query_Insignificant();
     }
     if (count($subqueries) == 1) {
         return $subqueries[0];
     }
     require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($subqueries as $subquery) {
         // Non-requirered entry/subquery
         $query->addSubquery($subquery);
     }
     return $query;
 }
Example #15
0
 /**
  * Re-write query into primitive queries in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  * @throws Zend_Search_Lucene_Exception
  */
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     $this->_matches = array();
     $this->_scores = array();
     $this->_termKeys = array();
     if ($this->_term->field === null) {
         // Search through all fields
         $fields = $index->getFieldNames(true);
     } else {
         $fields = array($this->_term->field);
     }
     //$1 'Zend/Search/Lucene/Index/Term.php';
     $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
     $prefixByteLength = strlen($prefix);
     $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
     $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
     $termRest = substr($this->_term->text, $prefixByteLength);
     // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
     $termRestLength = strlen($termRest);
     $scaleFactor = 1 / (1 - $this->_minimumSimilarity);
     //$1 'Zend/Search/Lucene.php';
     $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
     foreach ($fields as $field) {
         $index->resetTermsStream();
         //$1 'Zend/Search/Lucene/Index/Term.php';
         if ($prefix != '') {
             $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
             while ($index->currentTerm() !== null && $index->currentTerm()->field == $field && substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
                 // Calculate similarity
                 $target = substr($index->currentTerm()->text, $prefixByteLength);
                 $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
                 if ($termRestLength == 0) {
                     // we don't have anything to compare.  That means if we just add
                     // the letters for current term we get the new word
                     $similarity = $prefixUtf8Length == 0 ? 0 : 1 - strlen($target) / $prefixUtf8Length;
                 } else {
                     if (strlen($target) == 0) {
                         $similarity = $prefixUtf8Length == 0 ? 0 : 1 - $termRestLength / $prefixUtf8Length;
                     } else {
                         if ($maxDistance < abs($termRestLength - strlen($target))) {
                             //just adding the characters of term to target or vice-versa results in too many edits
                             //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
                             //given this optimal circumstance, the edit distance cannot be less than 5.
                             //which is 8-3 or more precisesly abs(3-8).
                             //if our maximum edit distance is 4, then we can discard this word
                             //without looking at it.
                             $similarity = 0;
                         } else {
                             $similarity = 1 - levenshtein($termRest, $target) / ($prefixUtf8Length + min($termRestLength, strlen($target)));
                         }
                     }
                 }
                 if ($similarity > $this->_minimumSimilarity) {
                     $this->_matches[] = $index->currentTerm();
                     $this->_termKeys[] = $index->currentTerm()->key();
                     $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor;
                     if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
                         //$1 'Zend/Search/Lucene/Exception.php';
                         throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
                     }
                 }
                 $index->nextTerm();
             }
         } else {
             $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
             while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
                 // Calculate similarity
                 $target = $index->currentTerm()->text;
                 $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance(0, $termRestLength, strlen($target));
                 if ($maxDistance < abs($termRestLength - strlen($target))) {
                     //just adding the characters of term to target or vice-versa results in too many edits
                     //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
                     //given this optimal circumstance, the edit distance cannot be less than 5.
                     //which is 8-3 or more precisesly abs(3-8).
                     //if our maximum edit distance is 4, then we can discard this word
                     //without looking at it.
                     $similarity = 0;
                 } else {
                     $similarity = 1 - levenshtein($termRest, $target) / min($termRestLength, strlen($target));
                 }
                 if ($similarity > $this->_minimumSimilarity) {
                     $this->_matches[] = $index->currentTerm();
                     $this->_termKeys[] = $index->currentTerm()->key();
                     $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor;
                     if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
                         //$1 'Zend/Search/Lucene/Exception.php';
                         throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
                     }
                 }
                 $index->nextTerm();
             }
         }
         $index->closeTermsStream();
     }
     if (count($this->_matches) == 0) {
         //$1 'Zend/Search/Lucene/Search/Query/Empty.php';
         return new Zend_Search_Lucene_Search_Query_Empty();
     } else {
         if (count($this->_matches) == 1) {
             //$1 'Zend/Search/Lucene/Search/Query/Term.php';
             return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
         } else {
             //$1 'Zend/Search/Lucene/Search/Query/Boolean.php';
             $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
             array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC, $this->_termKeys, SORT_ASC, SORT_STRING, $this->_matches);
             $termCount = 0;
             //$1 'Zend/Search/Lucene/Search/Query/Term.php';
             foreach ($this->_matches as $id => $matchedTerm) {
                 $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
                 $subquery->setBoost($this->_scores[$id]);
                 $rewrittenQuery->addSubquery($subquery);
                 $termCount++;
                 if ($termCount >= self::MAX_CLAUSE_COUNT) {
                     break;
                 }
             }
             return $rewrittenQuery;
         }
     }
 }
Example #16
0
 /**
  * Re-write query into primitive queries in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     if (count($this->_terms) == 0) {
         require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
         return new Zend_Search_Lucene_Search_Query_Empty();
     }
     // Check, that all fields are qualified
     $allQualified = true;
     foreach ($this->_terms as $term) {
         if ($term->field === null) {
             $allQualified = false;
             break;
         }
     }
     if ($allQualified) {
         return $this;
     } else {
         /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
         require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $query->setBoost($this->getBoost());
         require_once 'Zend/Search/Lucene/Search/Query/Term.php';
         foreach ($this->_terms as $termId => $term) {
             $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
             $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$termId]);
         }
         return $query;
     }
 }
Example #17
0
 /**
  * Re-write query into primitive queries in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     // Allow to use wildcards within phrases
     // They are either removed by text analyzer or used as a part of keyword for keyword fields
     //
     //        if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
     //            require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
     //            throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
     //        }
     // Split query into subqueries if field name is not specified
     if ($this->_field === null) {
         require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $query->setBoost($this->getBoost());
         require_once 'Zend/Search/Lucene.php';
         if (Zend_Search_Lucene::getDefaultSearchField() === null) {
             $searchFields = $index->getFieldNames(true);
         } else {
             $searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
         }
         foreach ($searchFields as $fieldName) {
             $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase, $this->_phraseEncoding, $fieldName);
             $subquery->setSlop($this->getSlop());
             $query->addSubquery($subquery->rewrite($index));
         }
         $this->_matches = $query->getQueryTerms();
         return $query;
     }
     // Recognize exact term matching (it corresponds to Keyword fields stored in the index)
     // encoding is not used since we expect binary matching
     require_once 'Zend/Search/Lucene/Index/Term.php';
     $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
     if ($index->hasTerm($term)) {
         require_once 'Zend/Search/Lucene/Search/Query/Term.php';
         $query = new Zend_Search_Lucene_Search_Query_Term($term);
         $query->setBoost($this->getBoost());
         $this->_matches = $query->getQueryTerms();
         return $query;
     }
     // tokenize phrase using current analyzer and process it as a phrase query
     require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
     $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
     if (count($tokens) == 0) {
         $this->_matches = array();
         require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
         return new Zend_Search_Lucene_Search_Query_Insignificant();
     }
     if (count($tokens) == 1) {
         require_once 'Zend/Search/Lucene/Index/Term.php';
         $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
         require_once 'Zend/Search/Lucene/Search/Query/Term.php';
         $query = new Zend_Search_Lucene_Search_Query_Term($term);
         $query->setBoost($this->getBoost());
         $this->_matches = $query->getQueryTerms();
         return $query;
     }
     //It's non-trivial phrase query
     $position = -1;
     require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
     $query = new Zend_Search_Lucene_Search_Query_Phrase();
     require_once 'Zend/Search/Lucene/Index/Term.php';
     foreach ($tokens as $token) {
         $position += $token->getPositionIncrement();
         $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
         $query->addTerm($term, $position);
         $query->setSlop($this->getSlop());
     }
     $this->_matches = $query->getQueryTerms();
     return $query;
 }
Example #18
0
 /**
  * Optimize query in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function optimize(Zend_Search_Lucene_Interface $index)
 {
     $subqueries = array();
     $signs = array();
     // Optimize all subqueries
     foreach ($this->_subqueries as $id => $subquery) {
         $subqueries[] = $subquery->optimize($index);
         $signs[] = $this->_signs === null ? true : $this->_signs[$id];
     }
     // Remove insignificant subqueries
     foreach ($subqueries as $id => $subquery) {
         if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
             // Insignificant subquery has to be removed anyway
             unset($subqueries[$id]);
             unset($signs[$id]);
         }
     }
     if (count($subqueries) == 0) {
         // Boolean query doesn't has non-insignificant subqueries
         return new Zend_Search_Lucene_Search_Query_Insignificant();
     }
     // Check if all non-insignificant subqueries are prohibited
     $allProhibited = true;
     foreach ($signs as $sign) {
         if ($sign !== false) {
             $allProhibited = false;
             break;
         }
     }
     if ($allProhibited) {
         return new Zend_Search_Lucene_Search_Query_Insignificant();
     }
     // Check for empty subqueries
     foreach ($subqueries as $id => $subquery) {
         if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
             if ($signs[$id] === true) {
                 // Matching is required, but is actually empty
                 return new Zend_Search_Lucene_Search_Query_Empty();
             } else {
                 // Matching is optional or prohibited, but is empty
                 // Remove it from subqueries and signs list
                 unset($subqueries[$id]);
                 unset($signs[$id]);
             }
         }
     }
     // Check, if reduced subqueries list is empty
     if (count($subqueries) == 0) {
         return new Zend_Search_Lucene_Search_Query_Empty();
     }
     // Check if all non-empty subqueries are prohibited
     $allProhibited = true;
     foreach ($signs as $sign) {
         if ($sign !== false) {
             $allProhibited = false;
             break;
         }
     }
     if ($allProhibited) {
         return new Zend_Search_Lucene_Search_Query_Empty();
     }
     // Check, if reduced subqueries list has only one entry
     if (count($subqueries) == 1) {
         // It's a query with only one required or optional clause
         // (it's already checked, that it's not a prohibited clause)
         if ($this->getBoost() == 1) {
             return reset($subqueries);
         }
         $optimizedQuery = clone reset($subqueries);
         $optimizedQuery->setBoost($optimizedQuery->getBoost() * $this->getBoost());
         return $optimizedQuery;
     }
     // Prepare first candidate for optimized query
     $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
     $optimizedQuery->setBoost($this->getBoost());
     $terms = array();
     $tsigns = array();
     $boostFactors = array();
     // Try to decompose term and multi-term subqueries
     foreach ($subqueries as $id => $subquery) {
         if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
             $terms[] = $subquery->getTerm();
             $tsigns[] = $signs[$id];
             $boostFactors[] = $subquery->getBoost();
             // remove subquery from a subqueries list
             unset($subqueries[$id]);
             unset($signs[$id]);
         } else {
             if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
                 $subTerms = $subquery->getTerms();
                 $subSigns = $subquery->getSigns();
                 if ($signs[$id] === true) {
                     // It's a required multi-term subquery.
                     // Something like '... +(+term1 -term2 term3 ...) ...'
                     // Multi-term required subquery can be decomposed only if it contains
                     // required terms and doesn't contain prohibited terms:
                     // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
                     //
                     // Check this
                     $hasRequired = false;
                     $hasProhibited = false;
                     if ($subSigns === null) {
                         // All subterms are required
                         $hasRequired = true;
                     } else {
                         foreach ($subSigns as $sign) {
                             if ($sign === true) {
                                 $hasRequired = true;
                             } else {
                                 if ($sign === false) {
                                     $hasProhibited = true;
                                     break;
                                 }
                             }
                         }
                     }
                     // Continue if subquery has prohibited terms or doesn't have required terms
                     if ($hasProhibited || !$hasRequired) {
                         continue;
                     }
                     foreach ($subTerms as $termId => $term) {
                         $terms[] = $term;
                         $tsigns[] = $subSigns === null ? true : $subSigns[$termId];
                         $boostFactors[] = $subquery->getBoost();
                     }
                     // remove subquery from a subqueries list
                     unset($subqueries[$id]);
                     unset($signs[$id]);
                 } else {
                     // $signs[$id] === null  ||  $signs[$id] === false
                     // It's an optional or prohibited multi-term subquery.
                     // Something like '... (+term1 -term2 term3 ...) ...'
                     // or
                     // something like '... -(+term1 -term2 term3 ...) ...'
                     // Multi-term optional and required subqueries can be decomposed
                     // only if all terms are optional.
                     //
                     // Check if all terms are optional.
                     $onlyOptional = true;
                     if ($subSigns === null) {
                         // All subterms are required
                         $onlyOptional = false;
                     } else {
                         foreach ($subSigns as $sign) {
                             if ($sign !== null) {
                                 $onlyOptional = false;
                                 break;
                             }
                         }
                     }
                     // Continue if non-optional terms are presented in this multi-term subquery
                     if (!$onlyOptional) {
                         continue;
                     }
                     foreach ($subTerms as $termId => $term) {
                         $terms[] = $term;
                         $tsigns[] = $signs[$id] === null ? null : false;
                         $boostFactors[] = $subquery->getBoost();
                     }
                     // remove subquery from a subqueries list
                     unset($subqueries[$id]);
                     unset($signs[$id]);
                 }
             }
         }
     }
     // Check, if there are no decomposed subqueries
     if (count($terms) == 0) {
         // return prepared candidate
         return $optimizedQuery;
     }
     // Check, if all subqueries have been decomposed and all terms has the same boost factor
     if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
         $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
         $optimizedQuery->setBoost(reset($boostFactors) * $this->getBoost());
         return $optimizedQuery;
     }
     // This boolean query can't be transformed to Term/MultiTerm query and still contains
     // several subqueries
     // Separate prohibited terms
     $prohibitedTerms = array();
     foreach ($terms as $id => $term) {
         if ($tsigns[$id] === false) {
             $prohibitedTerms[] = $term;
             unset($terms[$id]);
             unset($tsigns[$id]);
             unset($boostFactors[$id]);
         }
     }
     if (count($terms) == 1) {
         $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
         $clause->setBoost(reset($boostFactors));
         $subqueries[] = $clause;
         $signs[] = reset($tsigns);
         // Clear terms list
         $terms = array();
     } else {
         if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
             $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
             $clause->setBoost(reset($boostFactors));
             $subqueries[] = $clause;
             // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
             $signs[] = in_array(true, $tsigns) ? true : null;
             // Clear terms list
             $terms = array();
         }
     }
     if (count($prohibitedTerms) == 1) {
         // (boost factors are not significant for prohibited clauses)
         $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
         $signs[] = false;
         // Clear prohibited terms list
         $prohibitedTerms = array();
     } else {
         if (count($prohibitedTerms) > 1) {
             // prepare signs array
             $prohibitedSigns = array();
             foreach ($prohibitedTerms as $id => $term) {
                 // all prohibited term are grouped as optional into multi-term query
                 $prohibitedSigns[$id] = null;
             }
             // (boost factors are not significant for prohibited clauses)
             $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
             // Clause sign is 'prohibited'
             $signs[] = false;
             // Clear terms list
             $prohibitedTerms = array();
         }
     }
     /** @todo Group terms with the same boost factors together */
     // Check, that all terms are processed
     // Replace candidate for optimized query
     if (count($terms) == 0 && count($prohibitedTerms) == 0) {
         $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
         $optimizedQuery->setBoost($this->getBoost());
     }
     return $optimizedQuery;
 }
 /**
  * Display search results.
  */
 function searchResults()
 {
     ZendSearchHandler::setupTemplate();
     $plugin =& PluginRegistry::getPlugin('generic', 'ZendSearchPlugin');
     $isUsingSolr = $plugin->isUsingSolr();
     if ($isUsingSolr) {
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $plugin->getSetting('solrUrl') . '/select');
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
         curl_setopt($ch, CURLOPT_TIMEOUT, 10);
         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
         curl_setopt($ch, CURLOPT_ENCODING, '');
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
         curl_setopt($ch, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
         curl_setopt($ch, CURLOPT_POST, 1);
         $query = '';
     } else {
         $index =& $plugin->getIndex();
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
     }
     $q = Request::getUserVar('q');
     if (!empty($q)) {
         if ($isUsingSolr) {
             $query .= 'text:"' . ZendSearchHandler::luceneEscape($q) . '" ';
         } else {
             $query->addSubquery(Zend_Search_Lucene_Search_QueryParser::parse($q));
         }
     }
     $searchFormElementDao =& DAORegistry::getDAO('SearchFormElementDAO');
     $searchFormElements =& $searchFormElementDao->getSearchFormElements();
     while ($searchFormElement =& $searchFormElements->next()) {
         $searchFormElementId = $searchFormElement->getSearchFormElementId();
         $symbolic = $searchFormElement->getSymbolic();
         switch ($searchFormElement->getType()) {
             case SEARCH_FORM_ELEMENT_TYPE_SELECT:
             case SEARCH_FORM_ELEMENT_TYPE_STRING:
                 $term = Request::getUserVar($symbolic);
                 if (!empty($term)) {
                     if ($isUsingSolr) {
                         $query .= $symbolic . ':"' . ZendSearchHandler::luceneEscape($term) . '" ';
                     } else {
                         $query->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($term, $symbolic)), true);
                     }
                 }
                 break;
             case SEARCH_FORM_ELEMENT_TYPE_DATE:
                 $from = Request::getUserDateVar($symbolic . '-from');
                 $to = Request::getUserDateVar($symbolic . '-to');
                 if (!empty($from) && !empty($to)) {
                     if ($isUsingSolr) {
                         $query .= $symbolic . ':[' . strftime('%Y-%m-%dT%H:%M:%SZ', $from) . ' TO ' . strftime('%Y-%m-%dT%H:%M:%SZ', $to) . '] ';
                     } else {
                         $fromTerm = new Zend_Search_Lucene_Index_Term($from, $symbolic);
                         $toTerm = new Zend_Search_Lucene_Index_Term($to, $symbolic);
                         $query->addSubquery(new Zend_Search_Lucene_Search_Query_Range($fromTerm, $toTerm, true), true);
                     }
                 }
                 break;
             default:
                 fatalError('Unknown element type!');
         }
         unset($searchFormElement);
     }
     $rangeInfo =& PKPHandler::getRangeInfo('results');
     if ($isUsingSolr) {
         $itemsPerPage = Config::getVar('interface', 'items_per_page');
         curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . trim(urlencode($query)) . '&rows=' . urlencode($itemsPerPage) . ($rangeInfo ? '&start=' . $rangeInfo->getPage() * $itemsPerPage : ''));
         $data = curl_exec($ch);
         $xmlParser = new XMLParser();
         $result = null;
         $numFound = 0;
         @($result =& $xmlParser->parseTextStruct($data, array('str', 'result')));
         $recordIds = array();
         if ($result) {
             foreach ($result as $nodeSet) {
                 foreach ($nodeSet as $node) {
                     if (isset($node['attributes']['name']) && $node['attributes']['name'] == 'id') {
                         $recordIds[] = $node['value'];
                     } elseif (isset($node['attributes']['numFound'])) {
                         $numFound = $node['attributes']['numFound'];
                     }
                 }
             }
         }
         $plugin->import('SolrResultIterator');
         $resultsIterator =& SolrResultIterator::fromRangeInfo($recordIds, $numFound, $rangeInfo);
         unset($recordIds);
     } else {
         $resultsArray = $index->find($query);
         $plugin->import('ZendSearchResultIterator');
         $resultsIterator =& ZendSearchResultIterator::fromRangeInfo($resultsArray, $rangeInfo);
         unset($resultsArray);
     }
     $templateMgr =& TemplateManager::getManager();
     $templateMgr->assign_by_ref('recordDao', DAORegistry::getDAO('RecordDAO'));
     $templateMgr->assign_by_ref('results', $resultsIterator);
     $templateMgr->assign_by_ref('q', $q);
     $templateMgr->display($plugin->getTemplatePath() . 'results.tpl');
 }
 public function parseQuery()
 {
     QubitSearch::getInstance();
     $queryBuilt = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($this->request->searchFields as $searchField) {
         // if no terms for this field, skip it
         if (empty($searchField['query'])) {
             continue;
         }
         // enclose phrase searches in quotes (strip existing ones)
         if ('phrase' == $searchField['match']) {
             $term = '"' . str_replace(array('"', "'"), '', strtolower($searchField['query'])) . '"';
         } else {
             $term = strtolower($searchField['query']);
         }
         $matchString = $term;
         // limit to specified field
         if (!empty($searchField['field'])) {
             $term = $searchField['field'] . ':' . $term;
         }
         if (!empty($searchField['field'])) {
             $field = ucfirst($searchField['field']);
         } else {
             $field = 'phrase' == $searchField['match'] ? $this->getContext()->i18n->__('Phrase') : $this->getContext()->i18n->__('Keyword(s)');
         }
         $this->queryTerms[] = array('term' => $field . ': ' . $matchString, 'operator' => $searchField['operator']);
         // select which boolean operator to use
         if (!isset($searchField['operator'])) {
             $searchField['operator'] = null;
         }
         switch ($searchField['operator']) {
             case 'not':
                 $token = false;
                 break;
             case 'or':
                 $token = null;
                 break;
             case 'and':
             default:
                 $token = true;
                 break;
         }
         $queryBuilt->addSubquery(QubitSearch::getInstance()->parse($term), $token);
     }
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     $query->addSubquery($queryBuilt, true);
     return $query;
 }
$criteria = inst();
$t->diag('testing ->addField()');
try {
    $criteria->addField('string', 'foo');
    $t->pass('->addField() accepted a string value');
    $s = $criteria->getQuery()->getSubqueries();
    $t->ok($s[0] == new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term('string', 'foo')), '->addField() registers the new field');
} catch (Exception $e) {
    $t->fail('->addField() accepted a string value');
    $t->skip('->addField() registers the new field');
}
try {
    $criteria->addField(range(1, 10), 'bar');
    $t->pass('->addField() accepted an array value');
    $s = $criteria->getQuery()->getSubqueries();
    $q = new Zend_Search_Lucene_Search_Query_Boolean();
    foreach (range(1, 10) as $value) {
        $q->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($value, 'bar'), true));
    }
    $t->ok($s[1] == $q, '->addField() registers the array field');
} catch (Exception $e) {
    $t->fail('->addField() accepted an array value');
    $t->skip('->addField() registers the array field');
}
try {
    $criteria->addField(new Foo());
    $t->fail('->addField() rejects invalid values');
} catch (Exception $e) {
    $t->pass('->addField() rejects invalid values');
}
$t->diag('testing addMultiTerm()');
 function search()
 {
     if (!empty($this->data['Tutorial'])) {
         // convert POST to Cake named params (it's prettier than GET)
         $this->redirect(array_merge($this->params['named'], $this->data['Tutorial']));
     }
     // default to boolean AND searching
     Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND);
     $query = '';
     // Are there any parameters besides page?
     $named_params = array_diff_key($this->params['named'], array('page' => ''));
     if (!empty($named_params)) {
         // sanitize with exceptions for Zend Lucene query language. (Do the exceptions introduce a vulnerability?
         //   Can Zend Lucene validate a query ahead of time?)
         if (isset($this->params['named']['term'])) {
             //        $query = Sanitize::paranoid($this->params['named']['term'],
             //          array(' ', '"', "'", ':', '?', '*', '~', '[', ']', '_', '-', '{', '}', '.', '^', '+', '-', '(',
             //            ')', '&', '|', '!'));
             $query = $this->params['named']['term'];
             if (!empty($query)) {
                 // Intercept invalid queries
                 try {
                     Zend_Search_Lucene_Search_QueryParser::dontSuppressQueryParsingExceptions();
                     $parsed_query = Zend_Search_Lucene_Search_QueryParser::parse($query);
                 } catch (Zend_Search_Lucene_Exception $e) {
                     // Why can't I catch Zend_Search_Lucene_Search_QueryParserException?
                     $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?");
                     $this->redirect(array('action' => 'search', 'term' => Sanitize::paranoid($query, array(" "))));
                 }
             }
         } else {
             $parsed_query = new Zend_Search_Lucene_Search_Query_Boolean();
         }
         try {
             if (isset($this->params['named']['learning_goal'])) {
                 $learning_goals = explode('|', $this->params['named']['learning_goal']);
                 foreach ($learning_goals as $learning_goal) {
                     if (is_numeric($learning_goal)) {
                         $learning_goal_term = new Zend_Search_Lucene_Index_Term($learning_goal, 'learning_goal');
                         $learning_goal_query = new Zend_Search_Lucene_Search_Query_Term($learning_goal_term);
                         $parsed_query->addSubquery($learning_goal_query, true);
                     }
                 }
             }
             if (isset($this->params['named']['resource_type'])) {
                 $resource_types = explode('|', $this->params['named']['resource_type']);
                 foreach ($resource_types as $resource_type) {
                     if (is_numeric($resource_type)) {
                         $resource_type_term = new Zend_Search_Lucene_Index_Term($resource_type, 'resource_type');
                         $resource_type_query = new Zend_Search_Lucene_Search_Query_Term($resource_type_term);
                         $parsed_query->addSubquery($resource_type_query, true);
                     }
                 }
             }
             if (isset($this->params['named']['keyword'])) {
                 $keywords = explode('|', $this->params['named']['keyword']);
                 foreach ($keywords as $keyword) {
                     if (preg_match('/[A-Za-z0-9\\-]+/', $keyword)) {
                         // valid UUID?
                         $keyword_term = new Zend_Search_Lucene_Index_Term($keyword, 'keyword');
                         $keyword_query = new Zend_Search_Lucene_Search_Query_Term($keyword_term);
                         $parsed_query->addSubquery($keyword_query, true);
                     }
                 }
             }
         } catch (Zend_Search_Lucene_Exception $e) {
             // Why can't I catch Zend_Search_Lucene_Search_QueryParserException?
             $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?");
             $this->redirect(array('action' => 'search', 'query' => $query));
         }
         $this->paginate['SearchIndex'] = array('limit' => 10, 'conditions' => array('query' => $parsed_query), 'highlight' => true);
         $this->set('tutorials', $this->paginate($this->Tutorial->SearchIndex));
     } else {
         $this->paginate = array('published', 'limit' => 10, 'order' => 'Tutorial.title ASC', 'conditions' => array('in_index' => true), 'contain' => array('Tag'));
         $this->set('tutorials', $this->paginate($this->Tutorial));
     }
     $this->layout = 'institution';
     $results_context = array('model' => '', 'key' => '', 'id' => 0, 'name' => '');
     $this->set(compact('results_context'));
     $this->set('learningGoals', $this->Tutorial->LearningGoal->find('list'));
     $this->set('resourceTypes', $this->Tutorial->ResourceType->find('list'));
 }
Example #23
0
 protected function handleMulti($name, $val, $required)
 {
     /* $subquery = new Zend_Search_Lucene_Search_Query_MultiTerm();
                  
        foreach($val as $v){
          $subquery->addTerm(
            new Zend_Search_Lucene_Index_Term($v,$name)
            ///$required
          );
        }//foreach */
     $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($val as $v) {
         $subquery->addSubquery(new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($v, $name)));
     }
     //foreach
     return $subquery;
 }
 public function searchAction()
 {
     Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8');
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
     //		$request = Zend_Controller_Front::getInstance()->getRequest();
     $cfg = Zend_Registry::get('cfg');
     $indexPath = $cfg['search']['indexpath'];
     //		$pagesize = $this->getRequest()->getParam('pagesize', $cfg['search']['pagesize']);
     $hits = array();
     $queryStr = trim($this->getRequest()->getParam('query'));
     $index = Zend_Search_Lucene::open($indexPath);
     if (strlen($queryStr) > 0) {
         $index = Zend_Search_Lucene::open($indexPath);
         $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr);
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $query->addSubquery($userQuery, true);
         try {
             $hits = $index->find($query);
         } catch (Zend_Search_Lucene_Exception $ex) {
             $hits = array();
         }
     }
     $view = $this->view;
     //		$view->assign('hits', $hits);
     $view->assign('query', $queryStr);
     //		$view->assign('pagesize', $pagesize);
     $paginator = Zend_Paginator::factory($hits);
     //new Zend_Paginator(new Zend_Paginator_Adapter_Array($hits));
     $paginator->setCurrentPageNumber($this->getRequest()->getParam('page', 0));
     $paginator->setItemCountPerPage($this->getRequest()->getParam('pagesize', $cfg['search']['pagesize']));
     Zend_Paginator::setDefaultScrollingStyle('Sliding');
     Zend_View_Helper_PaginationControl::setDefaultViewPartial('pagination_search_control.phtml');
     $view->assign('paginator', $paginator);
     $view->queryParams = $this->getRequest()->getParams();
     /*		$req = Zend_Controller_Front::getInstance()->getRequest();
     		$paginator->setCurrentPageNumber($req->getParam('page', 0));
     		$paginator->setItemCountPerPage($req->getParam('itemsperpage', $items_on_page));*/
     //		return $paginator;
 }
 /**
  * Re-write queries into primitive queries
  * Also used for query optimization and binding to the index
  *
  * @param Zend_Search_Lucene $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function rewrite(Zend_Search_Lucene $index)
 {
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     $query->setBoost($this->getBoost());
     foreach ($this->_subqueries as $subqueryId => $subquery) {
         $query->addSubquery($subquery->rewrite($index), $this->_signs === null ? true : $this->_signs[$subqueryId]);
     }
     return $query;
 }
Example #26
0
	public static function search( $query, $subqueries = array()) {
		
		$query = strtolower($query);
		
		Loader::library('3rdparty/Zend/Search/Lucene');
		Loader::library('3rdparty/StandardAnalyzer/Analyzer/Standard/English');
		
		$index = new Zend_Search_Lucene(DIR_FILES_CACHE_PAGES);
		$index->setResultSetLimit(200);
		
		//Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English());
		Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English());
		
		$queryModifiers=array();

		$mainQuery = Zend_Search_Lucene_Search_QueryParser::parse($query, APP_CHARSET);

		$query = new Zend_Search_Lucene_Search_Query_Boolean();
		$query->addSubquery($mainQuery, true);
		
		foreach($subqueries as $subQ) {
			if( !is_array($subQ) || !isset( $subQ['query'] ) )
				 $subQuery = $subQ;				 
			else $subQuery = $subQ['query']; 			
						
			if( !is_array($subQ) || !isset($subQ['required']) )
				 $required=true;
			else $required=$subQ['required'];
			
			$query->addSubquery( $subQuery, $required );	
		}
		$query = utf8_encode($query);
		$resultsTmp = $index->find($query);

		$results = array();
		foreach($resultsTmp as $r)
			$results[] = new IndexedSearchResult($r->cID, $r->cName, $r->cDescription, $r->score, $r->cPath, $r->cBody);
		
		return $results;
	}
Example #27
0
 /**
  * Re-write query into primitive queries in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     if ($this->_field === null) {
         require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $hasInsignificantSubqueries = false;
         require_once 'Zend/Search/Lucene.php';
         if (Zend_Search_Lucene::getDefaultSearchField() === null) {
             $searchFields = $index->getFieldNames(true);
         } else {
             $searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
         }
         require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php';
         foreach ($searchFields as $fieldName) {
             $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_word, $this->_encoding, $fieldName, $this->_minimumSimilarity);
             $rewrittenSubquery = $subquery->rewrite($index);
             if (!($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant || $rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Empty)) {
                 $query->addSubquery($rewrittenSubquery);
             }
             if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
                 $hasInsignificantSubqueries = true;
             }
         }
         $subqueries = $query->getSubqueries();
         if (count($subqueries) == 0) {
             $this->_matches = array();
             if ($hasInsignificantSubqueries) {
                 require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
                 return new Zend_Search_Lucene_Search_Query_Insignificant();
             } else {
                 require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
                 return new Zend_Search_Lucene_Search_Query_Empty();
             }
         }
         if (count($subqueries) == 1) {
             $query = reset($subqueries);
         }
         $query->setBoost($this->getBoost());
         $this->_matches = $query->getQueryTerms();
         return $query;
     }
     // -------------------------------------
     // Recognize exact term matching (it corresponds to Keyword fields stored in the index)
     // encoding is not used since we expect binary matching
     require_once 'Zend/Search/Lucene/Index/Term.php';
     $term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
     if ($index->hasTerm($term)) {
         require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
         $query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
         $query->setBoost($this->getBoost());
         // Get rewritten query. Important! It also fills terms matching container.
         $rewrittenQuery = $query->rewrite($index);
         $this->_matches = $query->getQueryTerms();
         return $rewrittenQuery;
     }
     // -------------------------------------
     // Recognize wildcard queries
     /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
     if (@preg_match('/\\pL/u', 'a') == 1) {
         $subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
     } else {
         $subPatterns = preg_split('/[*?]/', $this->_word);
     }
     if (count($subPatterns) > 1) {
         require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
         throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search doesn\'t support wildcards (except within Keyword fields).');
     }
     // -------------------------------------
     // Recognize one-term multi-term and "insignificant" queries
     require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
     $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
     if (count($tokens) == 0) {
         $this->_matches = array();
         require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
         return new Zend_Search_Lucene_Search_Query_Insignificant();
     }
     if (count($tokens) == 1) {
         require_once 'Zend/Search/Lucene/Index/Term.php';
         $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
         require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
         $query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
         $query->setBoost($this->getBoost());
         // Get rewritten query. Important! It also fills terms matching container.
         $rewrittenQuery = $query->rewrite($index);
         $this->_matches = $query->getQueryTerms();
         return $rewrittenQuery;
     }
     // Word is tokenized into several tokens
     require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
     throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
 }
Example #28
0
 /**
  * Re-write query into primitive queries in the context of specified index
  *
  * @param Zend_Search_Lucene_Interface $index
  * @return Zend_Search_Lucene_Search_Query
  */
 public function rewrite(Zend_Search_Lucene_Interface $index)
 {
     if (count($this->_terms) == 0) {
         require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Empty.php';
         return new Zend_Search_Lucene_Search_Query_Empty();
     } else {
         if ($this->_terms[0]->field !== null) {
             return $this;
         } else {
             require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Query/Boolean.php';
             $query = new Zend_Search_Lucene_Search_Query_Boolean();
             $query->setBoost($this->getBoost());
             foreach ($index->getFieldNames(true) as $fieldName) {
                 $subquery = new Zend_Search_Lucene_Search_Query_Phrase();
                 $subquery->setSlop($this->getSlop());
                 require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Index/Term.php';
                 foreach ($this->_terms as $termId => $term) {
                     $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
                     $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
                 }
                 $query->addSubquery($subquery);
             }
             return $query;
         }
     }
 }
Example #29
0
 private function buildCondition($childNodes, $required)
 {
     $result = new Zend_Search_Lucene_Search_Query_Boolean();
     foreach ($childNodes as $child) {
         // Detect if child is a NOT, and reformulate on the fly to support the syntax
         if ($child instanceof Zend_Search_Lucene_Search_Query_Boolean) {
             $signs = $child->getSigns();
             if (count($signs) === 1 && $signs[0] === false) {
                 $result->addSubquery(reset($child->getSubqueries()), false);
                 continue;
             }
         }
         $result->addSubquery($child, $required);
     }
     return $result;
 }
Example #30
0
 public static function deleteFromLuceneIndex(Doctrine_Record $object, $culture = null)
 {
     $index = $object->getTable()->getLuceneIndex();
     // remove an existing entry
     $id = $object->getId();
     // 20090506: we can't use a regular query string here because
     // numbers (such as IDs) will get stripped from it. So we have
     // to build a query using the Zend Search API. Note that this means
     // the Jobeet sample code is incorrect.
     // http://framework.zend.com/manual/en/zend.search.lucene.searching.html#zend.search.lucene.searching.query_building
     $aTerm = new Zend_Search_Lucene_Index_Term($id, 'primarykey');
     $aQuery = new Zend_Search_Lucene_Search_Query_Term($aTerm);
     $query = new Zend_Search_Lucene_Search_Query_Boolean();
     $query->addSubquery($aQuery, true);
     if (!is_null($culture)) {
         $culture = self::normalizeCulture($culture);
         $cultureTerm = new Zend_Search_Lucene_Index_Term($culture, 'culture');
         // Oops, this said $aTerm before. Thanks to Quentin Dugauthier
         $cultureQuery = new Zend_Search_Lucene_Search_Query_Term($cultureTerm);
         $query->addSubquery($cultureQuery, true);
     }
     if ($hits = $index->find($query)) {
         // id is correct. This is the internal Zend search index id which is
         // not the same thing as the id of our object.
         // There should actually be only one hit for a specific id and culture
         foreach ($hits as $hit) {
             $index->delete($hit->id);
         }
     }
 }