Exemplo n.º 1
0
} elseif (empty($sruQuery)) {
    returnDiagnostic(7, "query");
} elseif (empty($sruVersion)) {
    returnDiagnostic(7, "version");
} elseif ($sruVersion != "1.1") {
    returnDiagnostic(5, "1.1");
} elseif (!preg_match("#^((srw_)?mods|info:srw/schema/1/mods-v3\\.2|http://www\\.loc\\.gov/mods/v3)\$#i", $sruRecordSchema) and !preg_match("#^((oai_|srw_)?dc|info:srw/schema/1/dc-v1\\.1|http://purl\\.org/dc/elements/1\\.1/)\$#i", $sruRecordSchema)) {
    returnDiagnostic(66, $sruRecordSchema);
} elseif (!preg_match("/^xml\$/i", $sruRecordPacking)) {
    returnDiagnostic(71, "Only 'recordPacking=xml' is supported");
} elseif (!empty($sruRecordXPath)) {
    returnDiagnostic(72, "");
} elseif (!empty($sruSortKeys)) {
    returnDiagnostic(80, "");
} elseif (!empty($sruResultSetTTL)) {
    returnDiagnostic(50, "");
} else {
    // use an appropriate default stylesheet:
    if ($exportStylesheet == "DEFAULT") {
        if (preg_match("#^((oai_|srw_)?dc|info:srw/schema/1/dc-v1\\.1|http://purl\\.org/dc/elements/1\\.1/)\$#i", $sruRecordSchema)) {
            // simple Dublin Core was requested as record schema
            $exportStylesheet = "srwdc2html.xsl";
        } else {
            // use a stylesheet that's appropriate for SRW+MODS XML:
            $exportStylesheet = "srwmods2html.xsl";
        }
    }
    //		// NOTE: the generation of SQL queries (or parts of) should REALLY be modular and be moved to separate dedicated functions!
    // CONSTRUCT SQL QUERY:
    // TODO: build the complete SQL query using functions 'buildFROMclause()' and 'buildORDERclause()'
    // Note: the 'verifySQLQuery()' function that gets called by 'search.php' to process query data with "$formType = sqlSearch" will add the user-specific fields to the 'SELECT' clause
Exemplo n.º 2
0
    // in '$contentTypeCharset' (which is defined in 'ini.inc.php'):
    setHeaderContentType("application/opensearchdescription+xml", $contentTypeCharset);
    // function 'setHeaderContentType()' is defined in 'include.inc.php'
    echo openSearchDescription($exportStylesheet);
    // function 'openSearchDescription()' is defined in 'opensearch.inc.php'
} elseif (preg_match("/^suggest\$/i", $operation) and preg_match("/^(html|json)\$/i", $recordSchema)) {
    // Set the appropriate mimetype & set the character encoding to the one given
    // in '$contentTypeCharset' (which is defined in 'ini.inc.php'):
    setHeaderContentType($exportContentType, $contentTypeCharset);
    echo searchSuggestions($cqlQuery, $query);
} elseif (!isset($_REQUEST['query']) and !isset($_REQUEST['recordSchema']) and !isset($_REQUEST['maximumRecords']) and !isset($_REQUEST['startRecord']) and !isset($_REQUEST['stylesheet'])) {
    showQueryPage($operation, $viewType, $showRows, $rowOffset);
} elseif (empty($cqlQuery)) {
    returnDiagnostic(7, "query");
} elseif (!preg_match("/^((atom|rss)([ _]?xml)?|srw([ _]?(mods|dc))?([ _]?xml)?|html|json)\$/i", $recordSchema)) {
    returnDiagnostic(66, $recordSchema);
} else {
    // Write the current OpenSearch/CQL query into a session variable:
    // (this session variable is used by functions 'atomCollection()' and 'citeRecords()' (in 'cite_html.php') to re-establish the original OpenSearch/CQL query;
    //  function 'atomCollection()' uses the OpenSearch/CQL query to output 'opensearch.php' URLs instead of 'show.php' URLs)
    saveSessionVariable("cqlQuery", $cqlQuery);
    // function 'saveSessionVariable()' is defined in 'include.inc.php'
    // Build the correct query URL:
    // (we skip unnecessary parameters here since function 'generateURL()' and 'show.php' will use their default values for them)
    $queryParametersArray = array("where" => $query, "submit" => $displayType, "viewType" => $viewType, "exportStylesheet" => $exportStylesheet);
    // NOTE: The 'show.php' script allows anonymous users to query the 'cite_key' field (if a valid 'userID' is included in the query URL).
    //       However, this requires that the cite key is passed in the 'cite_key' URL parameter. Since 'opensearch.php' uses the 'where'
    //       parameter to pass its query, anonymous querying of the 'cite_key' field currently does not work for 'opensearch.php'. But
    //       querying of user-specific fields will work if a user is logged in.
    if (isset($_SESSION['loginEmail'])) {
        // we only include the 'userID' parameter if the user is logged in
Exemplo n.º 3
0
function parseCQL($sruVersion, $sruQuery, $operation = "")
{
    global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers;
    // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
    // map CQL indexes to refbase field names:
    $indexNamesArray = mapCQLIndexes();
    $searchArray = array();
    // intialize array that will hold information about context set, index name, relation and search value
    $searchSubArray1 = array();
    // --------------------------------
    if (!empty($sruQuery)) {
        // check for presence of context set/index name and any of the main relations:
        if (!preg_match('/^[^\\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *)/', $sruQuery)) {
            // if no context set/index name and relation was given we'll add meaningful defaults:
            if (preg_match("/^suggest\$/i", $operation)) {
                $sruQuery = "main_fields all " . $sruQuery;
            } else {
                $sruQuery = "cql.serverChoice all " . $sruQuery;
            }
            // otherwise we currently use 'cql.serverChoice' (since 'main_fields' isn't yet supported for regular OpenSearch queries)
        }
        // extract the context set:
        if (preg_match('/^([^\\" <>=.]+)\\./', $sruQuery)) {
            $contextSet = preg_replace('/^([^\\" <>=.]+)\\..*/', '\\1', $sruQuery);
        } else {
            $contextSet = "";
        }
        // use the default context set
        // extract the index:
        $indexName = preg_replace('/^(?:[^\\" <>=.]+\\.)?([^\\" <>=.]+).*/', '\\1', $sruQuery);
        // ----------------
        // return a fatal diagnostic if the CQL query does contain an unrecognized 'set.index' identifier:
        // (a) verify that the given context set (if any) is recognized:
        if (!empty($contextSet)) {
            $contextSetIndexConnector = ".";
            $contextSetLabel = "context set '" . $contextSet . "'";
            if (!preg_match("/^(dc|bath|rec|bib|cql)\$/", $contextSet)) {
                returnDiagnostic(15, $contextSet);
                // unsupported context set (function 'returnDiagnostic()' is defined in 'opensearch.php' and 'sru.php')
                exit;
            }
        } else {
            $contextSetIndexConnector = "";
            $contextSetLabel = "empty context set";
        }
        // (b) verify that the given 'set.index' term is recognized:
        if (!isset($indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName])) {
            if (isset($indexNamesArray[$indexName]) or isset($indexNamesArray["dc." . $indexName]) or isset($indexNamesArray["bath." . $indexName]) or isset($indexNamesArray["rec." . $indexName]) or isset($indexNamesArray["bib." . $indexName]) or isset($indexNamesArray["cql." . $indexName])) {
                returnDiagnostic(10, "Unsupported combination of " . $contextSetLabel . " with index '" . $indexName . "'");
                // unsupported combination of context set & index
            } else {
                returnDiagnostic(16, $indexName);
                // unsupported index
            }
            exit;
        }
        // ----------------
        // extract the main relation (relation modifiers aren't supported yet!):
        $mainRelation = preg_replace('/^[^\\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *).*/', '\\1', $sruQuery);
        // remove any runs of leading or trailing whitespace:
        $mainRelation = trim($mainRelation);
        // ----------------
        // extract the search term:
        $searchTerm = preg_replace('/^[^\\" <>=]+(?: +(?:all|any|exact|within) +| *(?:<>|<=|>=|<|>|=) *)(.*)/', '\\1', $sruQuery);
        // remove slashes from search term if 'magic_quotes_gpc = On':
        $searchTerm = stripSlashesIfMagicQuotes($searchTerm);
        // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php'
        // remove any leading or trailing quotes from the search term:
        // (note that multiple query parts connected with boolean operators aren't supported yet!)
        $searchTerm = preg_replace('/^\\"/', '', $searchTerm);
        $searchTerm = preg_replace('/\\"$/', '', $searchTerm);
        // OpenSearch search suggestions ('$operation=suggest'): since CQL matches full words (not sub-strings),
        // we need to make sure that every search term ends with the '*' masking character:
        if (preg_match("/^suggest\$/i", $operation) and $mainRelation != "exact") {
            $searchTerm = preg_replace("/([{$word}]+)(?![?*^])/{$patternModifiers}", "\\1*", $searchTerm);
        }
        // escape meta characters (including '/' that is used as delimiter for the PCRE replace functions below and which gets passed as second argument):
        $searchTerm = preg_quote($searchTerm, "/");
        // escape special regular expression characters: . \ + * ? [ ^ ] $ ( ) { } = ! < > | :
        // account for CQL anchoring ('^') and masking ('*' and '?') characters:
        // NOTE: in the code block above we quote everything to escape possible meta characters,
        //       so all special chars in the block below have to be matched in their escaped form!
        //       (The expression '\\\\' in the patterns below describes only *one* backslash! -> '\'.
        //        The reason for this is that before the regex engine can interpret the \\ into \, PHP interprets it.
        //        Thus, you have to escape your backslashes twice: once for PHP, and once for the regex engine.)
        //
        // more info about masking characters in CQL:  <http://zing.z3950.org/cql/intro.html#6>
        // more info about word anchoring in CQL:      <http://zing.z3950.org/cql/intro.html#6.1>
        // recognize any anchor at the beginning of a search term (like '^foo'):
        // (in CQL, a word beginning with ^ must be the first in its field)
        $searchTerm = preg_replace('/(^| )\\\\\\^/', '\\1^', $searchTerm);
        // convert any anchor at the end of a search term (like 'foo^') to the correct MySQL variant ('foo$'):
        // (in CQL, a word ending with ^ must be the last in its field)
        $searchTerm = preg_replace('/\\\\\\^( |$)/', '$\\1', $searchTerm);
        // recognize any masking ('*' and '?') characters:
        // Note: by "character" we do refer to *word* characters here, i.e., any character that is not a space or punctuation character (see below);
        //       however, I'm not sure if the masking characters '*' and '?' should also include non-word characters!
        $searchTerm = preg_replace('/(?<!\\\\)\\\\\\*/', '[^[:space:][:punct:]]*', $searchTerm);
        // a single asterisk ('*') is used to mask zero or more characters
        $searchTerm = preg_replace('/(?<!\\\\)\\\\\\?/', '[^[:space:][:punct:]]', $searchTerm);
        // a single question mark ('?') is used to mask a single character, thus N consecutive question-marks means mask N characters
        // ----------------
        // construct the WHERE clause:
        $whereClausePart = $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName];
        // start WHERE clause with field name
        if ($mainRelation == "all") {
            if (preg_match("/ /", $searchTerm)) {
                $searchTermArray = preg_split("/ +/", $searchTerm);
                foreach ($searchTermArray as $searchTermItem) {
                    $whereClauseSubPartsArray[] = " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTermItem . "([[:space:][:punct:]]|\$)");
                }
                // NOTE: For word-matching relations (like 'all', 'any' or '=') we could also use word boundaries which would be more (too?) restrictive:
                //
                // [[:<:]] , [[:>:]]
                //
                // They match the beginning and end of words, respectively. A word is a sequence of word characters that is not preceded by or
                // followed by word characters. A word character is an alphanumeric character in the alnum class or an underscore (_).
                $whereClausePart .= implode(" AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName], $whereClauseSubPartsArray);
            } else {
                $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)");
            }
        } elseif ($mainRelation == "any") {
            $searchTerm = splitAndMerge("/ +/", "|", $searchTerm);
            // function 'splitAndMerge()' is defined in 'include.inc.php'
            $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])(" . $searchTerm . ")([[:space:][:punct:]]|\$)");
        } elseif ($mainRelation == "exact") {
            // 'exact' is used for exact string matching, i.e., it matches field contents exactly
            $whereClausePart .= " = " . quote_smart($searchTerm);
        } elseif ($mainRelation == "within") {
            if (preg_match("/[^ ]+ [^ ]+/", $searchTerm)) {
                $searchTermArray = preg_split("/ +/", $searchTerm);
                $whereClausePart .= " >= " . quote_smart($searchTermArray[0]) . " AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName] . " <= " . quote_smart($searchTermArray[1]);
            } else {
                returnDiagnostic(36, "Search term requires two space-separated dimensions. Example: dc.date within \"2004 2005\"");
                exit;
            }
        } elseif ($mainRelation == "=") {
            // matches full words (not sub-strings); '=' is used for word adjacency, the words appear in that order with no others intervening
            $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)");
        } elseif ($mainRelation == "<>") {
            // does this also match full words (and not sub-strings) ?:-/
            $whereClausePart .= " NOT RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|\$)");
        } elseif ($mainRelation == "<") {
            $whereClausePart .= " < " . quote_smart($searchTerm);
        } elseif ($mainRelation == "<=") {
            $whereClausePart .= " <= " . quote_smart($searchTerm);
        } elseif ($mainRelation == ">") {
            $whereClausePart .= " > " . quote_smart($searchTerm);
        } elseif ($mainRelation == ">=") {
            $whereClausePart .= " >= " . quote_smart($searchTerm);
        }
        $searchSubArray1[] = array("_boolean" => "", "_query" => $whereClausePart);
    } else {
        $searchSubArray1[] = array("_boolean" => "", "_query" => "serial RLIKE " . quote_smart(".+"));
    }
    // --------------------------------
    if (!empty($searchSubArray1)) {
        $searchArray[] = array("_boolean" => "", "_query" => $searchSubArray1);
    }
    return $searchArray;
}