function showQueryPage($operation, $viewType, $showRows, $rowOffset) { global $officialDatabaseName; // defined in 'ini.inc.php' global $displayType; global $loc; // defined in 'locales/core.php' global $client; // If there's no stored message available: if (!isset($_SESSION['HeaderString'])) { $HeaderString = $loc["SearchDB"] . ":"; } else { $HeaderString = $_SESSION['HeaderString']; // extract 'HeaderString' session variable (only necessary if register globals is OFF!) // Note: though we clear the session variable, the current message is still available to this script via '$HeaderString': deleteSessionVariable("HeaderString"); // function 'deleteSessionVariable()' is defined in 'include.inc.php' } // For HTML output, we'll need to reset the value of the '$displayType' variable // (which, by default, is set to "Export"; see above); otherwise, the 'originalDisplayType' // parameter in the 'quickSearch' form of the page header would be incorrectly set to "Export" $displayType = ""; // if '$displayType' is empty, 'show.php' will use the default view that's given in session variable 'userDefaultView' // Show the login status: showLogin(); // (function 'showLogin()' is defined in 'include.inc.php') // DISPLAY header: // call the 'displayHTMLhead()' and 'showPageHeader()' functions (which are defined in 'header.inc.php'): displayHTMLhead(encodeHTML($officialDatabaseName) . " -- " . $loc["Search"], "index,follow", "Search the " . encodeHTML($officialDatabaseName), "", true, "", $viewType, array()); if (!preg_match("/^Mobile\$/i", $viewType) and !preg_match("/^inc/i", $client)) { // Note: we omit the visible header in mobile view ('viewType=Mobile') and for include mechanisms! showPageHeader($HeaderString); } // Define variables holding common drop-down elements, i.e. build properly formatted <option> tag elements: $dropDownConditionals1Array = array("contains" => $loc["contains"], "does not contain" => $loc["contains not"], "is equal to" => $loc["equal to"], "is not equal to" => $loc["equal to not"], "starts with" => $loc["starts with"], "ends with" => $loc["ends with"]); $dropDownItems1 = buildSelectMenuOptions($dropDownConditionals1Array, "//", "\t\t\t", true); // function 'buildSelectMenuOptions()' is defined in 'include.inc.php' $dropDownConditionals2Array = array("is greater than" => $loc["is greater than"], "is less than" => $loc["is less than"], "is within range" => $loc["is within range"], "is within list" => $loc["is within list"]); $dropDownItems2 = buildSelectMenuOptions($dropDownConditionals2Array, "//", "\t\t\t", true); $dropDownFieldNames1Array = array("author" => $loc["DropDownFieldName_Author"], "address" => $loc["DropDownFieldName_Address"], "corporate_author" => $loc["DropDownFieldName_CorporateAuthor"], "thesis" => $loc["DropDownFieldName_Thesis"], "", "title" => $loc["DropDownFieldName_Title"], "orig_title" => $loc["DropDownFieldName_OrigTitle"], "", "year" => $loc["DropDownFieldName_Year"], "publication" => $loc["DropDownFieldName_Publication"], "abbrev_journal" => $loc["DropDownFieldName_AbbrevJournal"], "editor" => $loc["DropDownFieldName_Editor"], "", "volume_numeric" => $loc["DropDownFieldName_Volume"], "issue" => $loc["DropDownFieldName_Issue"], "pages" => $loc["DropDownFieldName_Pages"], "", "series_title" => $loc["DropDownFieldName_SeriesTitle"], "abbrev_series_title" => $loc["DropDownFieldName_AbbrevSeriesTitle"], "series_editor" => $loc["DropDownFieldName_SeriesEditor"], "series_volume_numeric" => $loc["DropDownFieldName_SeriesVolume"], "series_issue" => $loc["DropDownFieldName_SeriesIssue"], "", "publisher" => $loc["DropDownFieldName_Publisher"], "place" => $loc["DropDownFieldName_Place"], "", "edition" => $loc["DropDownFieldName_Edition"], "medium" => $loc["DropDownFieldName_Medium"], "issn" => $loc["DropDownFieldName_Issn"], "isbn" => $loc["DropDownFieldName_Isbn"], "", "language" => $loc["DropDownFieldName_Language"], "summary_language" => $loc["DropDownFieldName_SummaryLanguage"], "", "keywords" => $loc["DropDownFieldName_Keywords"], "abstract" => $loc["DropDownFieldName_Abstract"], "", "area" => $loc["DropDownFieldName_Area"], "expedition" => $loc["DropDownFieldName_Expedition"], "conference" => $loc["DropDownFieldName_Conference"], "", "doi" => $loc["DropDownFieldName_Doi"], "url" => $loc["DropDownFieldName_Url"]); if (isset($_SESSION['loginEmail'])) { // we only include the 'file' field if the user is logged in $dropDownFieldNames1Array["file"] = $loc["DropDownFieldName_File"]; } $dropDownFieldNames1Array[] = ""; $dropDownFieldNames1Array["notes"] = $loc["DropDownFieldName_Notes"]; if (isset($_SESSION['loginEmail'])) { // we only include the 'location' field if the user is logged in $dropDownFieldNames1Array["location"] = $loc["DropDownFieldName_Location"]; } $dropDownFieldNames2Array = array("call_number" => $loc["DropDownFieldName_CallNumber"], "", "serial" => $loc["DropDownFieldName_Serial"], "type" => $loc["DropDownFieldName_Type"], "approved" => $loc["DropDownFieldName_Approved"], "", "created_date" => $loc["DropDownFieldName_CreatedDate"], "created_time" => $loc["DropDownFieldName_CreatedTime"]); if (isset($_SESSION['loginEmail'])) { // we only include the 'created_by' field if the user is logged in $dropDownFieldNames2Array["created_by"] = $loc["DropDownFieldName_CreatedBy"]; } $dropDownFieldNames2Array[] = ""; $dropDownFieldNames2Array["modified_date"] = $loc["DropDownFieldName_ModifiedDate"]; $dropDownFieldNames2Array["modified_time"] = $loc["DropDownFieldName_ModifiedTime"]; if (isset($_SESSION['loginEmail'])) { // we only include the 'modified_by' field if the user is logged in $dropDownFieldNames2Array["modified_by"] = $loc["DropDownFieldName_ModifiedBy"]; } $dropDownItems3 = buildSelectMenuOptions(array_merge($dropDownFieldNames1Array, $dropDownFieldNames2Array), "//", "\t\t\t", true); $dropDownConditionals3Array = array("html" => "html", "atom" => "Atom XML", "rss" => "RSS XML", "srw_dc" => "SRW_DC XML", "srw_mods" => "SRW_MODS XML"); $dropDownItems4 = buildSelectMenuOptions($dropDownConditionals3Array, "//", "\t\t\t", true); // Map CQL indexes to refbase field names: $indexNamesArray = mapCQLIndexes(); // function 'mapCQLIndexes()' is defined in 'webservice.inc.php' // -------------------------------------------------------------------- // TODO: when the simple CQL Query Builder interface is done, a call to 'opensearch.php' (or 'opensearch.php?operation=simple') // should activate that simple GUI-based interface (currently, it activates the advanced interface that you'd normally only // get via 'opensearch.php?operation=cql' or 'opensearch.php?operation=advanced') // if (preg_match("/^(advanced|CQL)$/i", $operation)) showQueryFormAdvanced($dropDownItems1, $dropDownItems2, $dropDownItems3, $dropDownItems4, $showRows, $rowOffset, $indexNamesArray, $viewType); // let's you enter a standard CQL query directly // else // showQueryFormSimple($dropDownItems1, $dropDownItems2, $dropDownItems3, $dropDownItems4, $showRows, $rowOffset, $indexNamesArray, $viewType); // let's you build a CQL query via dropdown menues // -------------------------------------------------------------------- // DISPLAY THE HTML FOOTER: // call the 'showPageFooter()' and 'displayHTMLfoot()' functions (which are defined in 'footer.inc.php') if (!preg_match("/^Mobile\$/i", $viewType) and !preg_match("/^inc/i", $client)) { // Note: we omit the visible footer in mobile view ('viewType=Mobile') and for include mechanisms! showPageFooter($HeaderString); } displayHTMLfoot(); }
function parseCQL($sruVersion, $sruQuery, $operation = "") { global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' // map CQL indexes to refbase field names: $indexNamesArray = mapCQLIndexes(); $searchArray = array(); // intialize array that will hold information about context set, index name, relation and search value $searchSubArray1 = array(); // -------------------------------- if (!empty($sruQuery)) { // check for presence of context set/index name and any of the main relations: if (!preg_match('/^[^\\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *)/', $sruQuery)) { // if no context set/index name and relation was given we'll add meaningful defaults: if (preg_match("/^suggest\$/i", $operation)) { $sruQuery = "main_fields all " . $sruQuery; } else { $sruQuery = "cql.serverChoice all " . $sruQuery; } // otherwise we currently use 'cql.serverChoice' (since 'main_fields' isn't yet supported for regular OpenSearch queries) } // extract the context set: if (preg_match('/^([^\\" <>=.]+)\\./', $sruQuery)) { $contextSet = preg_replace('/^([^\\" <>=.]+)\\..*/', '\\1', $sruQuery); } else { $contextSet = ""; } // use the default context set // extract the index: $indexName = preg_replace('/^(?:[^\\" <>=.]+\\.)?([^\\" <>=.]+).*/', '\\1', $sruQuery); // ---------------- // return a fatal diagnostic if the CQL query does contain an unrecognized 'set.index' identifier: // (a) verify that the given context set (if any) is recognized: if (!empty($contextSet)) { $contextSetIndexConnector = "."; $contextSetLabel = "context set '" . $contextSet . "'"; if (!preg_match("/^(dc|bath|rec|bib|cql)\$/", $contextSet)) { returnDiagnostic(15, $contextSet); // unsupported context set (function 'returnDiagnostic()' is defined in 'opensearch.php' and 'sru.php') exit; } } else { $contextSetIndexConnector = ""; $contextSetLabel = "empty context set"; } // (b) verify that the given 'set.index' term is recognized: if (!isset($indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName])) { if (isset($indexNamesArray[$indexName]) or isset($indexNamesArray["dc." . $indexName]) or isset($indexNamesArray["bath." . $indexName]) or isset($indexNamesArray["rec." . $indexName]) or isset($indexNamesArray["bib." . $indexName]) or isset($indexNamesArray["cql." . $indexName])) { returnDiagnostic(10, "Unsupported combination of " . $contextSetLabel . " with index '" . $indexName . "'"); // unsupported combination of context set & index } else { returnDiagnostic(16, $indexName); // unsupported index } exit; } // ---------------- // extract the main relation (relation modifiers aren't supported yet!): $mainRelation = preg_replace('/^[^\\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *).*/', '\\1', $sruQuery); // remove any runs of leading or trailing whitespace: $mainRelation = trim($mainRelation); // ---------------- // extract the search term: $searchTerm = preg_replace('/^[^\\" <>=]+(?: +(?:all|any|exact|within) +| *(?:<>|<=|>=|<|>|=) *)(.*)/', '\\1', $sruQuery); // remove slashes from search term if 'magic_quotes_gpc = On': $searchTerm = stripSlashesIfMagicQuotes($searchTerm); // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php' // remove any leading or trailing quotes from the search term: // (note that multiple query parts connected with boolean operators aren't supported yet!) $searchTerm = preg_replace('/^\\"/', '', $searchTerm); $searchTerm = preg_replace('/\\"$/', '', $searchTerm); // OpenSearch search suggestions ('$operation=suggest'): since CQL matches full words (not sub-strings), // we need to make sure that every search term ends with the '*' masking character: if (preg_match("/^suggest\$/i", $operation) and $mainRelation != "exact") { $searchTerm = preg_replace("/([{$word}]+)(?![?*^])/{$patternModifiers}", "\\1*", $searchTerm); } // escape meta characters (including '/' that is used as delimiter for the PCRE replace functions below and which gets passed as second argument): $searchTerm = preg_quote($searchTerm, "/"); // escape special regular expression characters: . \ + * ? [ ^ ] $ ( ) { } = ! < > | : // account for CQL anchoring ('^') and masking ('*' and '?') characters: // NOTE: in the code block above we quote everything to escape possible meta characters, // so all special chars in the block below have to be matched in their escaped form! // (The expression '\\\\' in the patterns below describes only *one* backslash! -> '\'. // The reason for this is that before the regex engine can interpret the \\ into \, PHP interprets it. // Thus, you have to escape your backslashes twice: once for PHP, and once for the regex engine.) // // more info about masking characters in CQL: <http://zing.z3950.org/cql/intro.html#6> // more info about word anchoring in CQL: <http://zing.z3950.org/cql/intro.html#6.1> // recognize any anchor at the beginning of a search term (like '^foo'): // (in CQL, a word beginning with ^ must be the first in its field) $searchTerm = preg_replace('/(^| )\\\\\\^/', '\\1^', $searchTerm); // convert any anchor at the end of a search term (like 'foo^') to the correct MySQL variant ('foo$'): // (in CQL, a word ending with ^ must be the last in its field) $searchTerm = preg_replace('/\\\\\\^( |$)/', '$\\1', $searchTerm); // recognize any masking ('*' and '?') characters: // Note: by "character" we do refer to *word* characters here, i.e., any character that is not a space or punctuation character (see below); // however, I'm not sure if the masking characters '*' and '?' should also include non-word characters! $searchTerm = preg_replace('/(?<!\\\\)\\\\\\*/', '[^[:space:][:punct:]]*', $searchTerm); // a single asterisk ('*') is used to mask zero or more characters $searchTerm = preg_replace('/(?<!\\\\)\\\\\\?/', '[^[:space:][:punct:]]', $searchTerm); // a single question mark ('?') is used to mask a single character, thus N consecutive question-marks means mask N characters // ---------------- // construct the WHERE clause: $whereClausePart = $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName]; // start WHERE clause with field name if ($mainRelation == "all") { if (preg_match("/ /", $searchTerm)) { $searchTermArray = preg_split("/ +/", $searchTerm); foreach ($searchTermArray as $searchTermItem) { $whereClauseSubPartsArray[] = " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTermItem . "([[:space:][:punct:]]|\$)"); } // NOTE: For word-matching relations (like 'all', 'any' or '=') we could also use word boundaries which would be more (too?) restrictive: // // [[:<:]] , [[:>:]] // // They match the beginning and end of words, respectively. A word is a sequence of word characters that is not preceded by or // followed by word characters. A word character is an alphanumeric character in the alnum class or an underscore (_). $whereClausePart .= implode(" AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName], $whereClauseSubPartsArray); } else { $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)"); } } elseif ($mainRelation == "any") { $searchTerm = splitAndMerge("/ +/", "|", $searchTerm); // function 'splitAndMerge()' is defined in 'include.inc.php' $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])(" . $searchTerm . ")([[:space:][:punct:]]|\$)"); } elseif ($mainRelation == "exact") { // 'exact' is used for exact string matching, i.e., it matches field contents exactly $whereClausePart .= " = " . quote_smart($searchTerm); } elseif ($mainRelation == "within") { if (preg_match("/[^ ]+ [^ ]+/", $searchTerm)) { $searchTermArray = preg_split("/ +/", $searchTerm); $whereClausePart .= " >= " . quote_smart($searchTermArray[0]) . " AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName] . " <= " . quote_smart($searchTermArray[1]); } else { returnDiagnostic(36, "Search term requires two space-separated dimensions. Example: dc.date within \"2004 2005\""); exit; } } elseif ($mainRelation == "=") { // matches full words (not sub-strings); '=' is used for word adjacency, the words appear in that order with no others intervening $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)"); } elseif ($mainRelation == "<>") { // does this also match full words (and not sub-strings) ?:-/ $whereClausePart .= " NOT RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)"); } elseif ($mainRelation == "<") { $whereClausePart .= " < " . quote_smart($searchTerm); } elseif ($mainRelation == "<=") { $whereClausePart .= " <= " . quote_smart($searchTerm); } elseif ($mainRelation == ">") { $whereClausePart .= " > " . quote_smart($searchTerm); } elseif ($mainRelation == ">=") { $whereClausePart .= " >= " . quote_smart($searchTerm); } $searchSubArray1[] = array("_boolean" => "", "_query" => $whereClausePart); } else { $searchSubArray1[] = array("_boolean" => "", "_query" => "serial RLIKE " . quote_smart(".+")); } // -------------------------------- if (!empty($searchSubArray1)) { $searchArray[] = array("_boolean" => "", "_query" => $searchSubArray1); } return $searchArray; }