function search() { if (!empty($this->data['Tutorial'])) { // convert POST to Cake named params (it's prettier than GET) $this->redirect(array_merge($this->params['named'], $this->data['Tutorial'])); } // default to boolean AND searching Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); $query = ''; // Are there any parameters besides page? $named_params = array_diff_key($this->params['named'], array('page' => '')); if (!empty($named_params)) { // sanitize with exceptions for Zend Lucene query language. (Do the exceptions introduce a vulnerability? // Can Zend Lucene validate a query ahead of time?) if (isset($this->params['named']['term'])) { // $query = Sanitize::paranoid($this->params['named']['term'], // array(' ', '"', "'", ':', '?', '*', '~', '[', ']', '_', '-', '{', '}', '.', '^', '+', '-', '(', // ')', '&', '|', '!')); $query = $this->params['named']['term']; if (!empty($query)) { // Intercept invalid queries try { Zend_Search_Lucene_Search_QueryParser::dontSuppressQueryParsingExceptions(); $parsed_query = Zend_Search_Lucene_Search_QueryParser::parse($query); } catch (Zend_Search_Lucene_Exception $e) { // Why can't I catch Zend_Search_Lucene_Search_QueryParserException? $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?"); $this->redirect(array('action' => 'search', 'term' => Sanitize::paranoid($query, array(" ")))); } } } else { $parsed_query = new Zend_Search_Lucene_Search_Query_Boolean(); } try { if (isset($this->params['named']['learning_goal'])) { $learning_goals = explode('|', $this->params['named']['learning_goal']); foreach ($learning_goals as $learning_goal) { if (is_numeric($learning_goal)) { $learning_goal_term = new Zend_Search_Lucene_Index_Term($learning_goal, 'learning_goal'); $learning_goal_query = new Zend_Search_Lucene_Search_Query_Term($learning_goal_term); $parsed_query->addSubquery($learning_goal_query, true); } } } if (isset($this->params['named']['resource_type'])) { $resource_types = explode('|', $this->params['named']['resource_type']); foreach ($resource_types as $resource_type) { if (is_numeric($resource_type)) { $resource_type_term = new Zend_Search_Lucene_Index_Term($resource_type, 'resource_type'); $resource_type_query = new Zend_Search_Lucene_Search_Query_Term($resource_type_term); $parsed_query->addSubquery($resource_type_query, true); } } } if (isset($this->params['named']['keyword'])) { $keywords = explode('|', $this->params['named']['keyword']); foreach ($keywords as $keyword) { if (preg_match('/[A-Za-z0-9\\-]+/', $keyword)) { // valid UUID? $keyword_term = new Zend_Search_Lucene_Index_Term($keyword, 'keyword'); $keyword_query = new Zend_Search_Lucene_Search_Query_Term($keyword_term); $parsed_query->addSubquery($keyword_query, true); } } } } catch (Zend_Search_Lucene_Exception $e) { // Why can't I catch Zend_Search_Lucene_Search_QueryParserException? $this->Session->setFlash("We're not sure what you mean. Are your search terms correct?"); $this->redirect(array('action' => 'search', 'query' => $query)); } $this->paginate['SearchIndex'] = array('limit' => 10, 'conditions' => array('query' => $parsed_query), 'highlight' => true); $this->set('tutorials', $this->paginate($this->Tutorial->SearchIndex)); } else { $this->paginate = array('published', 'limit' => 10, 'order' => 'Tutorial.title ASC', 'conditions' => array('in_index' => true), 'contain' => array('Tag')); $this->set('tutorials', $this->paginate($this->Tutorial)); } $this->layout = 'institution'; $results_context = array('model' => '', 'key' => '', 'id' => 0, 'name' => ''); $this->set(compact('results_context')); $this->set('learningGoals', $this->Tutorial->LearningGoal->find('list')); $this->set('resourceTypes', $this->Tutorial->ResourceType->find('list')); }
/** * Returns the index * * @return type */ public function getIndex() { // var_dump("in get index"); if ($this->index != null) { return $this->index; } Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); // Important also for GUID Searches (e.g. delete) Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); $searchIndexPath = Yii::app()->getRuntimePath() . DIRECTORY_SEPARATOR . "searchdb" . DIRECTORY_SEPARATOR; //var_dump($searchIndexPath); try { $index = Zend_Search_Lucene::open($searchIndexPath); } catch (Exception $ex) { $index = Zend_Search_Lucene::create($searchIndexPath); } $this->index = $index; return $index; }
/** * search function * searches the index * * @param mixed $Model * @param mixed $query * @param int $limit * @param int $page * @access public * @return void */ function search(&$Model, $query, $limit = 20, $page = 1) { // open the index if (!$this->open_index($Model)) { return false; } try { // set the default encoding Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8'); // zend search results limiting (We will use the LimitIterator) // we can use it for some maximum value like 1000 if its likely that there could be more results Zend_Search_Lucene::setResultSetLimit(1000); // set the parser default operator to AND Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); // utf-8 num analyzer Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); // parse the query $Query = Zend_Search_Lucene_Search_QueryParser::parse($query); $Terms = $Query->getQueryTerms(); foreach ($Terms as $Term) { $this->terms[] = $Term->text; } // do the search $Hits = new ArrayObject($this->Index->find($Query)); } catch (Zend_Search_Lucene_Exception $e) { $this->log("Zend_Search_Lucene error: " . $e->getMessage(), 'searchable'); } $this->hits_count = count($Hits); if (!count($Hits)) { return null; } $Hits = new LimitIterator($Hits->getIterator(), ($page - 1) * $limit, $limit); $results = array(); foreach ($Hits as $Hit) { $Document = $Hit->getDocument(); $fields = $Document->getFieldNames(); $result = array(); foreach ($fields as $field) { $result['Result'][$field] = $Document->{$field}; } $results[] = $result; } return $results; }
/** * get the actual lucene instance for the $table * * @param string $table * @return Zend_Search_Lucene_Proxy */ protected function getLucene(MingoTable $table) { $table_name = $table->getName(); // canary... if (isset($this->con_db[$table_name])) { return $this->con_db[$table_name]; } //if $ret_instance = null; $path = array(); $path[] = $this->getField('path'); $path[] = $table_name; $index_path = join(DIRECTORY_SEPARATOR, $path); try { // install the index if it doesn't already exist... if (is_dir($index_path)) { $ret_instance = Zend_Search_Lucene::open($index_path); } else { $ret_instance = Zend_Search_Lucene::create($index_path); } //if/else } catch (Zend_Search_Lucene_Exception $e) { clearstatcache(); usleep(10); $ret_instance = Zend_Search_Lucene::create($index_path); } //try/catch if ($ret_instance !== null) { // set some values... /* Matlin on #phpc turned me onto MaxMergeDocs, MergeFactor, MaxBufferedDocs as probably the cause for the weird http 500 timeout issues I have when adding to the index */ $ret_instance->setMaxMergeDocs((int) $this->max_merged_docs); $ret_instance->setMaxBufferedDocs((int) $this->max_buffered_docs); $ret_instance->setMergeFactor((int) $this->merge_factor); // treat numbers like words, don't worry about case // see: http://framework.zend.com/manual/en/zend.search.lucene.charset.html 38.6.2.... Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); // default to and, instead of or, when there is no operator... // http://framework.zend.com/manual/en/zend.search.lucene.query-language.html#zend.search.lucene.query-language.boolean.no-operator Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); } else { throw new UnexpectedValueException('could not create Lucene connection'); } //if/else $this->con_db[$table_name] = $ret_instance; return $ret_instance; }
/** * Трансформация запроса nc_search_query в запрос Lucene * @param nc_search_query $query * @return Zend_Search_Lucene_Search_Query */ protected function get_lucene_query(nc_search_query $query) { Zend_Search_Lucene_Search_QueryParser::suppressQueryParsingExceptions(); if ($this->get_setting('DefaultBooleanOperator') == 'AND') { Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); } $query_string = $query->to_string(); // range search for integers if (nc_search::should('AllowRangeSearch') && strpos($query_string, ' TO ')) { preg_match_all("/(\\[|\\{)\\s*(\\d+)\\s+TO\\s+(\\d+)\\s*(\\]|\\})/", $query_string, $matches, PREG_SET_ORDER); foreach ($matches as $m) { $query_string = str_replace($m[0], $m[1] . $this->pad_integer($m[2]) . " TO " . $this->pad_integer($m[3]) . $m[4], $query_string); } } // add a time range should it be required $modified_after = $query->get('modified_after'); $modified_before = $query->get('modified_before'); if ($modified_before || $modified_after) { $modified_after = $modified_after ? strftime("%Y%m%d%H%M%S", strtotime($modified_after)) : "19000101000000"; $modified_before = $modified_before ? strftime("%Y%m%d%H%M%S", strtotime($modified_before)) : "22000101000000"; $query_string = "({$query_string}) last_modified:[{$modified_after} TO {$modified_before}]"; } // add area $area = $query->get('area'); if ($area) { if (!$area instanceof nc_search_area) { $area = new nc_search_area($area); } $is_boolean = nc_search_util::is_boolean_query($query_string); $query_string = "({$query_string}) " . ($is_boolean ? " AND " : "+") . $area->get_field_condition($is_boolean); } // parse string into Lucene Query $zend_query = Zend_Search_Lucene_Search_QueryParser::parse($query_string, 'UTF-8'); return $zend_query; }
/** * @see xfIndex */ protected function initialize() { $this->setEngine(new xfLuceneEngine(sfConfig::get('sf_data_dir') . '/index')); $this->getEngine()->open(); Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(Zend_Search_Lucene_Search_QueryParser::B_AND); }