/** * Performs a search by calling the search() method on the underlying search engine plugin * Information about all searches is logged to ca_search_log * * @param string $ps_search The search to perform; engine takes Lucene syntax query * @param SearchResult $po_result A newly instantiated sub-class of SearchResult to place search results into and return. If this is not set, then a generic SearchResults object will be returned. * @param array $pa_options Optional array of options for the search. Options include *: * sort = field or attribute to sort on in <table name>.<field or attribute name> format (eg. ca_objects.idno); default is to sort on relevance (aka. sort='_natural') * sort_direction = direction to sort results by, either 'asc' for ascending order or 'desc' for descending order; default is 'asc' * no_cache = if true, search is performed regardless of whether results for the search are already cached; default is false * limit = if set then search results will be limited to the quantity specified. If not set then all results are returned. * form_id = optional form identifier string to record in log for search * log_details = optional form description to record in log for search * search_source = optional source indicator text to record in log for search * checkAccess = optional array of access values to filter results on * showDeleted = if set to true, related items that have been deleted are returned. Default is false. * deletedOnly = if set to true, only deleted items are returned. Default is false. * limitToModifiedOn = if set returned results will be limited to rows modified within the specified date range. The value should be a date/time expression parse-able by TimeExpressionParser * sets = if value is a list of set_ids, only rows that are members of those sets will be returned * user_id = If set item level access control is performed relative to specified user_id, otherwise defaults to logged in user * dontFilterByACL = if true ACL checking is not performed on results * appendToSearch = * * @return SearchResult Results packages in a SearchResult object, or sub-class of SearchResult if an instance was passed in $po_result * @uses TimeExpressionParser::parse */ public function doSearch($ps_search, $po_result = null, $pa_options = null) { global $AUTH_CURRENT_USER_ID; if ($vs_append_to_search = isset($pa_options['appendToSearch']) ? ' ' . $pa_options['appendToSearch'] : '') { $ps_search .= $vs_append_to_search; } $ps_search = str_replace("[BLANK]", '"[BLANK]"', $ps_search); // the special [BLANK] search term, which returns records that have *no* content in a specific fields, has to be quoted in order to protect the square brackets from the parser. $t = new Timer(); if (!is_array($pa_options)) { $pa_options = array(); } $vn_limit = isset($pa_options['limit']) && $pa_options['limit'] > 0 ? (int) $pa_options['limit'] : null; //print "QUERY=$ps_search<br>"; // // Note that this is *not* misplaced code that should be in the Lucene plugin! // // We are using the Lucene syntax as our query syntax regardless the of back-end search engine. // The Lucene calls below just parse the query and then rewrite access points as-needed; the result // is a Lucene-compliant query ready-to-roll that is passed to the engine plugin. Of course, the Lucene // plugin just uses the string as-is... other plugins my choose to parse it however they wish to. // // // Process suffixes list... if search conforms to regex then we append a suffix. // This is useful, for example, to allow auto-wildcarding of accession numbers: if the search looks like an accession regex-wise we can append a "*" // $va_suffixes = $this->opo_search_config->getAssoc('search_suffixes'); if (is_array($va_suffixes) && sizeof($va_suffixes) && !preg_match('!"!', $ps_search)) { // don't add suffix wildcards when quoting foreach ($va_suffixes as $vs_preg => $vs_suffix) { if (preg_match("!{$vs_preg}!", $ps_search)) { $ps_search = preg_replace("!({$vs_preg})[\\*]*!", "\$1{$vs_suffix}", $ps_search); } } } $vb_no_cache = isset($pa_options['no_cache']) ? $pa_options['no_cache'] : false; unset($pa_options['no_cache']); $t_table = $this->opo_datamodel->getInstanceByTableName($this->ops_tablename, true); $vs_pk = $t_table->primaryKey(); $o_cache = new SearchCache(); if (!$vb_no_cache && $o_cache->load($ps_search, $this->opn_tablenum, $pa_options)) { $va_hits = $o_cache->getResults(); if (isset($pa_options['sort']) && $pa_options['sort'] && $pa_options['sort'] != '_natural') { $va_hits = $this->sortHits($va_hits, $pa_options['sort'], isset($pa_options['sort_direction']) ? $pa_options['sort_direction'] : null); } $o_res = new WLPlugSearchEngineCachedResult(array_keys($va_hits), $this->opn_tablenum); } else { $vs_char_set = $this->opo_app_config->get('character_set'); $o_query_parser = new LuceneSyntaxParser(); $o_query_parser->setEncoding($vs_char_set); $o_query_parser->setDefaultOperator(LuceneSyntaxParser::B_AND); $ps_search = preg_replace('![\']+!', '', $ps_search); try { $o_parsed_query = $o_query_parser->parse($ps_search, $vs_char_set); } catch (Exception $e) { // Retry search with all non-alphanumeric characters removed try { $o_parsed_query = $o_query_parser->parse(preg_replace("![^A-Za-z0-9 ]+!", " ", $ps_search), $vs_char_set); } catch (Exception $e) { $o_parsed_query = $o_query_parser->parse("", $vs_char_set); } } $va_rewrite_results = $this->_rewriteQuery($o_parsed_query); $o_rewritten_query = new Zend_Search_Lucene_Search_Query_Boolean($va_rewrite_results['terms'], $va_rewrite_results['signs']); $vs_search = $this->_queryToString($o_rewritten_query); //print "<div style='background:#FFFFFF; padding: 5px; border: 1px dotted #666666;'><strong>DEBUG: </strong>".$ps_search.'/'.$vs_search."</div>"; // Filter deleted records out of final result if (isset($pa_options['deletedOnly']) && $pa_options['deletedOnly'] && $t_table->hasField('deleted')) { $this->addResultFilter($this->ops_tablename . '.deleted', '=', '1'); } else { if ((!isset($pa_options['showDeleted']) || !$pa_options['showDeleted']) && $t_table->hasField('deleted')) { $this->addResultFilter($this->ops_tablename . '.deleted', '=', '0'); } } if (isset($pa_options['checkAccess']) && (is_array($pa_options['checkAccess']) && sizeof($pa_options['checkAccess']))) { $va_access_values = $pa_options['checkAccess']; $this->addResultFilter($this->ops_tablename . '.access', 'IN', join(",", $va_access_values)); } if (is_array($va_type_ids = $this->getTypeRestrictionList()) && sizeof($va_type_ids)) { $this->addResultFilter($this->ops_tablename . '.type_id', 'IN', join(",", $va_type_ids)); } $o_res = $this->opo_engine->search($this->opn_tablenum, $vs_search, $this->opa_result_filters, $o_rewritten_query); // cache the results $va_hits = array_flip($o_res->getPrimaryKeyValues($vn_limit)); $o_res->seek(0); if (isset($pa_options['sets']) && $pa_options['sets']) { $va_hits = $this->filterHitsBySets($va_hits, $pa_options['sets'], array('search' => $vs_search)); } $vn_user_id = isset($pa_options['user_id']) && (int) $pa_options['user_id'] ? (int) $pa_options['user_id'] : (int) $AUTH_CURRENT_USER_ID; if ((!isset($pa_options['dontFilterByACL']) || !$pa_options['dontFilterByACL']) && $this->opo_app_config->get('perform_item_level_access_checking') && method_exists($t_table, "supportsACL") && $t_table->supportsACL()) { $va_hits = $this->filterHitsByACL($va_hits, $vn_user_id, __CA_ACL_READONLY_ACCESS__); } if (isset($pa_options['sort']) && $pa_options['sort'] && $pa_options['sort'] != '_natural') { $va_hits = $this->sortHits($va_hits, $pa_options['sort'], isset($pa_options['sort_direction']) ? $pa_options['sort_direction'] : null); } $o_res = new WLPlugSearchEngineCachedResult($va_hit_values = array_keys($va_hits), $this->opn_tablenum); // cache for later use $o_cache->save($ps_search, $this->opn_tablenum, $va_hits, null, null, array_merge($pa_options, array('filters' => $this->getResultFilters()))); // log search $o_log = new Searchlog(); $vn_search_form_id = isset($pa_options['form_id']) ? $pa_options['form_id'] : null; $vs_log_details = isset($pa_options['log_details']) ? $pa_options['log_details'] : ''; $vs_search_source = isset($pa_options['search_source']) ? $pa_options['search_source'] : ''; $vn_execution_time = $t->getTime(4); $o_log->log(array('user_id' => $vn_user_id, 'table_num' => $this->opn_tablenum, 'search_expression' => $ps_search, 'num_hits' => sizeof($va_hit_values), 'form_id' => $vn_search_form_id, 'ip_addr' => $_SERVER['REMOTE_ADDR'] ? $_SERVER['REMOTE_ADDR'] : null, 'details' => $vs_log_details, 'search_source' => $vs_search_source, 'execution_time' => $vn_execution_time)); } if ($po_result) { $po_result->init($o_res, $this->opa_tables, $pa_options); return $po_result; } else { return new SearchResult($o_res, $this->opa_tables); } }
/** * Returns search expression as string for display with field qualifiers translated into display labels * * @param string $ps_search * @param mixed $ps_table * @return string */ public static function getSearchExpressionForDisplay($ps_search, $ps_table) { $o_dm = Datamodel::load(); $o_config = Configuration::load(); if ($t_instance = $o_dm->getInstanceByTableName($ps_table, true)) { $vs_char_set = $o_config->get('character_set'); $o_query_parser = new LuceneSyntaxParser(); $o_query_parser->setEncoding($vs_char_set); $o_query_parser->setDefaultOperator(LuceneSyntaxParser::B_AND); $ps_search = preg_replace('![\']+!', '', $ps_search); try { $o_parsed_query = $o_query_parser->parse($ps_search, $vs_char_set); } catch (Exception $e) { // Retry search with all non-alphanumeric characters removed try { $o_parsed_query = $o_query_parser->parse(preg_replace("![^A-Za-z0-9 ]+!", " ", $ps_search), $vs_char_set); } catch (Exception $e) { $o_parsed_query = $o_query_parser->parse("", $vs_char_set); } } $va_field_list = SearchEngine::_getFieldList($o_parsed_query); foreach ($va_field_list as $vs_field) { $va_tmp = explode('/', $vs_field); if (sizeof($va_tmp) > 1) { $vs_rel_type = $va_tmp[1]; $vs_field_proc = $va_tmp[0]; } else { $vs_rel_type = null; $vs_field_proc = $vs_field; } if ($vs_label = $t_instance->getDisplayLabel($vs_field_proc)) { $ps_search = str_replace($vs_field, $vs_rel_type ? _t("%1 [as %2]", $vs_label, $vs_rel_type) : $vs_label, $ps_search); } } } return $ps_search; }
<?php require_once '../../../../../../setup.php'; require_once __CA_LIB_DIR__ . "/core/Search/Common/Parsers/LuceneSyntaxParser.php"; require_once __CA_LIB_DIR__ . "/core/Zend/Search/Lucene.php"; require_once __CA_LIB_DIR__ . "/core/Zend/Search/Lucene/Search/QueryParser.php"; $vo_old_parser = new Zend_Search_Lucene_Search_QueryParser(); $vo_parser = new LuceneSyntaxParser(); $va_searches = array('accession:X91298', 'idno:SDK.2008.43', 'length:24\\"', 'location:41.442N,-74.433W', 'content:(+deer -bear)', 'Kai likes Lego', 'length:{4m to 8m}', 'title:"The Right Way" AND text:go', 'title:Do it right', 'test*', 't?st', 'roam~0.8', '"jakarta apache"~10', 'jakarta^4 apache', '"jakarta apache" NOT "Apache Lucene"', '"jakarta apache" -"Apache Lucene"', 'title:(+return +"pink panther")', 'media/strawberry_flag/159_Strawberry Gazette #5_2010.08.29/Strawberry Gazette 5.pdf', '\\(1\\+1\\)\\:2'); foreach ($va_searches as $vs_search) { $vo_query = $vo_parser->parse($vs_search); $vo_old_query = $vo_old_parser->parse($vs_search); print "SEARCH TEXT: {$vs_search}\n"; print "NEW QUERY PARSE TREE TO STRING: {$vo_query->__toString()}\n"; print "OLD QUERY PARSE TREE TO STRING: {$vo_old_query->__toString()}\n"; print "# ---------------------------------\n"; }