Example #1
0
function findDuplicates($sqlQuery, $originalDisplayType)
{
    global $tableRefs, $tableUserData;
    // defined in 'db.inc.php'
    global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers;
    // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
    // re-assign the correct display type (i.e. the view that was active when the user clicked the 'dups' link in the header):
    if (!empty($originalDisplayType)) {
        $displayType = $originalDisplayType;
    }
    // Extract form variables provided by the 'duplicateSearch' form in 'duplicate_search.php':
    if (isset($_REQUEST['matchFieldsSelector'])) {
        if (is_string($_REQUEST['matchFieldsSelector'])) {
            // we accept a string containing a (e.g. comma delimited) list of field names
            $selectedFieldsArray = preg_split("/[^a-z_]+/", $_REQUEST['matchFieldsSelector'], -1, PREG_SPLIT_NO_EMPTY);
        } else {
            // the field list is already provided as array:
            $selectedFieldsArray = $_REQUEST['matchFieldsSelector'];
        }
    } else {
        $selectedFieldsArray = array();
    }
    if (isset($_REQUEST['ignoreWhitespace']) and $_REQUEST['ignoreWhitespace'] == "1") {
        $ignoreWhitespace = "1";
    } else {
        $ignoreWhitespace = "0";
    }
    if (isset($_REQUEST['ignorePunctuation']) and $_REQUEST['ignorePunctuation'] == "1") {
        $ignorePunctuation = "1";
    } else {
        $ignorePunctuation = "0";
    }
    if (isset($_REQUEST['ignoreCharacterCase']) and $_REQUEST['ignoreCharacterCase'] == "1") {
        $ignoreCharacterCase = "1";
    } else {
        $ignoreCharacterCase = "0";
    }
    if (isset($_REQUEST['ignoreAuthorInitials']) and $_REQUEST['ignoreAuthorInitials'] == "1") {
        $ignoreAuthorInitials = "1";
    } else {
        $ignoreAuthorInitials = "0";
    }
    if (isset($_REQUEST['nonASCIIChars'])) {
        $nonASCIIChars = $_REQUEST['nonASCIIChars'];
    } else {
        $nonASCIIChars = "keep";
    }
    // VALIDATE FORM DATA:
    $errors = array();
    // Validate the field selector:
    if (empty($selectedFieldsArray)) {
        $errors["matchFieldsSelector"] = "You must select at least one field:";
    }
    // Validate the 'SQL Query' field:
    if (empty($sqlQuery)) {
        $errors["sqlQuery"] = "You must specify a query string:";
    } elseif (!preg_match("/^SELECT/i", $sqlQuery)) {
        $errors["sqlQuery"] = "You can only execute SELECT queries:";
    }
    // Check if there were any errors:
    if (count($errors) > 0) {
        // In case of an error, we write all form variables back to the '$formVars' array
        // (which 'duplicate_search.php' requires to reload form values):
        foreach ($_REQUEST as $varname => $value) {
            $formVars[$varname] = $value;
        }
        // Since checkbox form fields do only get included in the '$_REQUEST' array if they were marked,
        // we have to add appropriate array elements for all checkboxes that weren't set:
        if (!isset($formVars["ignoreWhitespace"])) {
            $formVars["ignoreWhitespace"] = "0";
        }
        if (!isset($formVars["ignorePunctuation"])) {
            $formVars["ignorePunctuation"] = "0";
        }
        if (!isset($formVars["ignoreCharacterCase"])) {
            $formVars["ignoreCharacterCase"] = "0";
        }
        if (!isset($formVars["ignoreAuthorInitials"])) {
            $formVars["ignoreAuthorInitials"] = "0";
        }
        if (!isset($formVars["showLinks"])) {
            $formVars["showLinks"] = "0";
        }
        // Write back session variables:
        saveSessionVariable("errors", $errors);
        // function 'saveSessionVariable()' is defined in 'include.inc.php'
        saveSessionVariable("formVars", $formVars);
        // There are errors. Relocate back to 'duplicate_search.php':
        header("Location: duplicate_search.php");
        exit;
        // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    }
    // CONSTRUCT SQL QUERY (1. DUPLICATE SEARCH):
    // To identify any duplicates within the results of the original query, we build a new query based on the original SQL query:
    $query = $sqlQuery;
    // Replace SELECT list of columns with those from '$selectedFieldsArray' (plus the 'serial' column):
    $selectedFieldsString = implode(", ", $selectedFieldsArray);
    $query = newSELECTclause("SELECT " . $selectedFieldsString . ", serial", $query, false);
    // function 'newSELECTclause()' is defined in 'include.inc.php'
    // Replace any existing ORDER BY clause with the list of columns given in '$selectedFieldsArray':
    $query = newORDERclause("ORDER BY " . $selectedFieldsString, $query, false);
    // function 'newORDERclause()' is defined in 'include.inc.php'
    // Fix escape sequences within the SQL query:
    $query = stripSlashesIfMagicQuotes($query);
    // RUN the query on the database through the connection:
    $result = queryMySQLDatabase($query);
    // function 'queryMySQLDatabase()' is defined in 'include.inc.php'
    // PROCESS RESULTS:
    $recordSerialsArray = array();
    $duplicateRecordSerialsArray = array();
    $rowsFound = @mysql_num_rows($result);
    // Identify any records with matching field data:
    if ($rowsFound > 0) {
        // Count the number of fields:
        $fieldsFound = mysql_num_fields($result);
        // Loop over each row in the result set:
        for ($rowCounter = 0; $row = @mysql_fetch_array($result); $rowCounter++) {
            $recordIdentifier = "";
            // make sure our buffer variable is empty
            // For each row, loop over each field (except for the last one which is the 'serial' field):
            for ($i = 0; $i < $fieldsFound - 1; $i++) {
                // fetch the current attribute name:
                $fieldName = getMySQLFieldInfo($result, $i, "name");
                // function 'getMySQLFieldInfo()' is defined in 'include.inc.php'
                // normalize author names:
                if ($fieldName == "author" and $ignoreAuthorInitials == "1") {
                    // this is a stupid hack that maps the names of the '$row' array keys to those used
                    // by the '$formVars' array (which is required by function 'parsePlaceholderString()')
                    // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
                    $formVars = buildFormVarsArray($row);
                    // function 'buildFormVarsArray()' is defined in 'include.inc.php'
                    // ignore initials in author names:
                    $row[$i] = parsePlaceholderString($formVars, "<:authors[0||]:>", "");
                    // function 'parsePlaceholderString()' is defined in 'include.inc.php'
                }
                $recordIdentifier .= $row[$i];
                // merge all field values to form a unique record identifier string
            }
            // Normalize record identifier string:
            if ($ignoreWhitespace == "1") {
                // ignore whitespace
                $recordIdentifier = preg_replace("/\\s+/", "", $recordIdentifier);
            }
            if ($ignorePunctuation == "1") {
                // ignore punctuation
                $recordIdentifier = preg_replace("/[{$punct}]+/{$patternModifiers}", "", $recordIdentifier);
            }
            if ($ignoreCharacterCase == "1") {
                // ignore character case
                $recordIdentifier = strtolower($recordIdentifier);
            }
            if ($nonASCIIChars == "strip") {
                // strip non-ASCII characters
                $recordIdentifier = handleNonASCIIAndUnwantedCharacters($recordIdentifier, "\\S\\s", "strip");
            } elseif ($nonASCIIChars == "transliterate") {
                // transliterate non-ASCII characters
                $recordIdentifier = handleNonASCIIAndUnwantedCharacters($recordIdentifier, "\\S\\s", "transliterate");
            }
            // Check whether the record identifier string has occurred already:
            if (isset($recordSerialsArray[$recordIdentifier])) {
                // this record identifier string has already been seen
                $recordSerialsArray[$recordIdentifier][] = $row["serial"];
            } else {
                // new record identifier string
                $recordSerialsArray[$recordIdentifier] = array($row["serial"]);
            }
            // add a new array element for this record's identifier string (and store its serial number as value within a sub-array)
        }
        // Collect all array elements from '$recordSerialsArray' where their sub-array contains more than one serial number:
        foreach ($recordSerialsArray as $recordSerials) {
            if (count($recordSerials) > 1) {
                foreach ($recordSerials as $recordSerial) {
                    $duplicateRecordSerialsArray[] = $recordSerial;
                }
            }
            // add this record's serial number to the array of duplicate record serials
        }
    } else {
        // TODO!
    }
    if (empty($duplicateRecordSerialsArray)) {
        $duplicateRecordSerialsArray[] = "0";
    }
    // if no duplicate records were found, the non-existing serial number '0' will result in a "nothing found" feedback
    // CONSTRUCT SQL QUERY (2. DUPLICATES DISPLAY):
    // To display any duplicates that were found within the results of the original query, we build again a new query based on the original SQL query:
    $query = $sqlQuery;
    // Replace WHERE clause:
    // TODO: maybe make this into a generic function? (compare with function 'extractWHEREclause()' in 'include.inc.php')
    $duplicateRecordSerialsString = implode("|", $duplicateRecordSerialsArray);
    $query = preg_replace("/(?<=WHERE )(.+?)(?= ORDER BY| LIMIT| GROUP BY| HAVING| PROCEDURE| FOR UPDATE| LOCK IN|[ ;]+(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP|FILE)\\b|\$)/i", "serial RLIKE \"^(" . $duplicateRecordSerialsString . ")\$\"", $query);
    // Replace any existing ORDER BY clause with the list of columns given in '$selectedFieldsArray':
    $query = newORDERclause("ORDER BY " . $selectedFieldsString, $query, false);
    return array($query, $displayType);
}
Example #2
0
function extractFormElementsRefineDisplay($queryTable, $displayType, $originalDisplayType, $query, $showLinks, $citeOrder, $userID)
{
    global $tableRefs, $tableUserData, $tableUsers;
    // defined in 'db.inc.php'
    global $loc;
    // '$loc' is made globally available in 'core.php'
    $encodedDisplayType = encodeHTML($displayType);
    // note that we need to HTML encode '$displayType' for comparison with the HTML encoded locales
    // extract form variables:
    if ($encodedDisplayType == $loc["ButtonTitle_Search"]) {
        $fieldSelector = $_REQUEST['refineSearchSelector'];
        // extract field name chosen by the user
        $refineSearchName = $_REQUEST['refineSearchName'];
        // extract search text entered by the user
        if (isset($_REQUEST['refineSearchExclude'])) {
            // extract user option whether matched records should be included or excluded
            $refineSearchActionCheckbox = $_REQUEST['refineSearchExclude'];
        } else {
            $refineSearchActionCheckbox = "0";
        }
        // the user did NOT mark the checkbox next to "Exclude matches"
    } elseif (preg_match("/^(" . $loc["ButtonTitle_Show"] . "|" . $loc["ButtonTitle_Hide"] . "|" . $loc["ButtonTitle_Browse"] . ")\$/", $encodedDisplayType)) {
        if (isset($_REQUEST['displayOptionsSelector'])) {
            $fieldSelector = $_REQUEST['displayOptionsSelector'];
        } else {
            $fieldSelector = "";
        }
    } else {
        $fieldSelector = "";
    }
    // this avoids 'Undefined variable...' messages when a user has changed the language setting on the options page, and then reloads an existing page (whose URL still has a 'submit' value in the previously used language)
    // extract the fields of the SELECT clause from the current SQL query:
    $previousSelectClause = extractSELECTclause($query);
    // ensure to add any required fields to the SELECT clause:
    if ($queryTable == $tableRefs) {
        // 'search.php':
        $addRequiredFields = true;
    } elseif ($queryTable == $tableUsers) {
        // 'users.php':
        $addRequiredFields = false;
    }
    // we'll add any required fields to the 'users.php' SELECT clause below
    // TODO: this wouldn't be necessary if function 'buildSELECTclause()' would handle the requirements of 'users.php'
    $additionalFields = "";
    if ($encodedDisplayType == $loc["ButtonTitle_Search"]) {
        // rebuild the current SELECT clause:
        $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause);
        // replace current SELECT clause:
        $query = newSELECTclause($newSelectClause, $query, false);
        if ($refineSearchName != "") {
            // Depending on the chosen output action, construct an appropriate SQL query:
            if ($refineSearchActionCheckbox == "0") {
                // for the fields 'marked=no', 'copy=false' and 'selected=no', force NULL values to be matched:
                if ($fieldSelector == "marked" and $refineSearchName == "no" or $fieldSelector == "copy" and $refineSearchName == "false" or $fieldSelector == "selected" and $refineSearchName == "no") {
                    $query = preg_replace("/ WHERE /i", " WHERE ({$fieldSelector} RLIKE " . quote_smart($refineSearchName) . " OR {$fieldSelector} IS NULL) AND ", $query);
                } else {
                    // add default 'WHERE' clause:
                    $query = preg_replace("/ WHERE /i", " WHERE {$fieldSelector} RLIKE " . quote_smart($refineSearchName) . " AND ", $query);
                }
                // ...add search field name & value to the SQL query
            } else {
                $query = preg_replace("/ WHERE /i", " WHERE ({$fieldSelector} NOT RLIKE " . quote_smart($refineSearchName) . " OR {$fieldSelector} IS NULL) AND ", $query);
                // ...add search field name & value to the SQL query
            }
            $query = preg_replace('/ AND serial RLIKE "\\.\\+"/i', '', $query);
            // remove any 'AND serial RLIKE ".+"' which isn't required anymore
        }
        // else, if the user did NOT type a search string into the text entry field, we simply keep the old WHERE clause...
    } elseif (preg_match("/^(" . $loc["ButtonTitle_Show"] . "|" . $loc["ButtonTitle_Hide"] . ")\$/", $encodedDisplayType)) {
        if (preg_match("/^Cite\$/i", $originalDisplayType)) {
            // generate a SELECT clause that's appropriate for Citation view (or Details view):
            $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields);
            // rebuild the current ORDER clause:
            if (preg_match("/^(author|year|type|type-year|creation-date)\$/i", $citeOrder)) {
                if ($citeOrder == "year") {
                    // sort records first by year (descending):
                    $newORDER = "ORDER BY year DESC, first_author, author_count, author, title";
                } elseif ($citeOrder == "type") {
                    // sort records first by record type and thesis type (descending):
                    $newORDER = "ORDER BY type DESC, thesis DESC, first_author, author_count, author, year, title";
                } elseif ($citeOrder == "type-year") {
                    // sort records first by record type and thesis type (descending), then by year (descending):
                    $newORDER = "ORDER BY type DESC, thesis DESC, year DESC, first_author, author_count, author, title";
                } elseif ($citeOrder == "creation-date") {
                    // sort records such that newly added/edited records get listed top of the list:
                    $newORDER = "ORDER BY created_date DESC, created_time DESC, modified_date DESC, modified_time DESC, serial DESC";
                } elseif ($citeOrder == "author") {
                    // supply the default ORDER BY pattern (which is suitable for citation in a journal etc.):
                    $newORDER = "ORDER BY first_author, author_count, author, year, title";
                }
                // replace current ORDER clause:
                $query = newORDERclause($newORDER, $query, false);
            }
            // else if any other or no '$citeOrder' parameter is specified, we keep the current ORDER BY clause
            // NOTE: this behaviour is different from functions 'extractFormElementsQueryResults()' and 'extractFormElementsExtract()'
            //       where we always use 'ORDER BY first_author, author_count, author, year, title' as default ORDER BY clause
            //       (to ensure correct sorting for output to bibliographic reference lists)
        } elseif (preg_match("/^Display\$/i", $originalDisplayType)) {
            // NOTE: the below code for displaying & hiding of fields in Details view must be adopted if either layout or field names are changed!
            $fieldsList = "";
            if ($fieldSelector == "all fields") {
                // generate a SELECT clause that shows all fields in Details view:
                $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, true, $addRequiredFields);
            } else {
                if ($encodedDisplayType == $loc["ButtonTitle_Show"]) {
                    $matchField = "pages";
                    if ($fieldSelector == "keywords, abstract") {
                        $fieldsList = ", keywords, abstract";
                    } elseif ($fieldSelector == "additional fields") {
                        $fieldsList = ", address, corporate_author, thesis, publisher, place, editor, language, summary_language, orig_title, series_editor, series_title, abbrev_series_title, series_volume, series_issue, edition, issn, isbn, medium, area, expedition, conference, notes, approved";
                        if (isset($_SESSION['loginEmail'])) {
                            $fieldsList .= ", location";
                        }
                        // we only add the 'location' field if the user is logged in
                        if (preg_match("/\\babstract\\b/i", $previousSelectClause)) {
                            $matchField = "abstract";
                        }
                    } elseif ($fieldSelector == "my fields") {
                        $fieldsList = ", marked, copy, selected, user_keys, user_notes, user_file, user_groups, cite_key";
                        if (preg_match("/\\bserial\\b/i", $previousSelectClause)) {
                            $matchField = "serial";
                        } elseif (preg_match("/\\babstract\\b/i", $previousSelectClause)) {
                            $matchField = "abstract";
                        }
                    }
                    if (!empty($fieldsList) and !preg_match("/\\b" . $fieldsList . "\\b/i", $previousSelectClause)) {
                        // if none of the chosen fields are currently displayed...
                        $previousSelectClause = preg_replace("/(?<=\\b" . $matchField . "\\b)/i", $fieldsList, $previousSelectClause);
                    }
                    // ...add the chosen fields to the current SELECT clause:
                }
                if ($encodedDisplayType == $loc["ButtonTitle_Hide"]) {
                    if ($fieldSelector == "keywords, abstract") {
                        $fieldsList = "\\b(keywords|abstract)\\b";
                    } elseif ($fieldSelector == "additional fields") {
                        $fieldsList = "\\b(corporate_author|thesis|address|publisher|place|editor|language|summary_language|orig_title|series_editor|series_title|abbrev_series_title|series_volume|series_issue|edition|issn|isbn|medium|area|expedition|conference|notes|approved|location)\\b";
                    } elseif ($fieldSelector == "my fields") {
                        $fieldsList = "\\b(marked|copy|selected|user_keys|user_notes|user_file|user_groups|cite_key)\\b";
                    }
                    if (!empty($fieldsList) and preg_match("/\\b" . $fieldsList . "\\b/i", $previousSelectClause)) {
                        // ...remove the chosen fields from the fields given in the current SELECT clause:
                        $previousSelectClause = preg_replace("/ *, *" . $fieldsList . " */i", "", $previousSelectClause);
                        // all columns except the first
                        $previousSelectClause = preg_replace("/ *" . $fieldsList . " *, */i", "", $previousSelectClause);
                        // all columns except the last
                    }
                }
                // rebuild the current SELECT clause, but include (or exclude) the chosen fields:
                $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause);
            }
        } else {
            if ($encodedDisplayType == $loc["ButtonTitle_Show"]) {
                if (!preg_match("/\\b" . $fieldSelector . "\\b/i", $previousSelectClause)) {
                    // ...and the chosen field is *not* already displayed...
                    $additionalFields = $fieldSelector;
                }
                // ...add the chosen field to the current SELECT clause
            } elseif ($encodedDisplayType == $loc["ButtonTitle_Hide"]) {
                if (preg_match("/\\b" . $fieldSelector . "\\b/i", $previousSelectClause)) {
                    // ...remove the chosen field from the fields given in the current SELECT clause:
                    $previousSelectClause = preg_replace("/ *, *\\b" . $fieldSelector . "\\b */i", "", $previousSelectClause);
                    // all columns except the first
                    $previousSelectClause = preg_replace("/ *\\b" . $fieldSelector . "\\b *, */i", "", $previousSelectClause);
                    // all columns except the last
                }
            }
            // rebuild the current SELECT clause, but include (or exclude) the chosen field:
            $newSelectClause = buildSELECTclause("", $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause);
        }
        // replace current SELECT clause:
        $query = newSELECTclause($newSelectClause, $query, false);
    } elseif ($encodedDisplayType == $loc["ButtonTitle_Browse"]) {
        $previousField = preg_replace("/^SELECT (\\w+).+/i", "\\1", $query);
        // extract the field that was previously used in Browse view
        if (!preg_match("/^" . $fieldSelector . "\$/i", $previousField)) {
            // ...modify the SQL query to show a summary for the new field that was chosen by the user:
            // (NOTE: these replace patterns aren't 100% safe and may fail if the user has modified the query using 'sql_search.php'!)
            $query = preg_replace("/^SELECT {$previousField}/i", "SELECT {$fieldSelector}", $query);
            // use the field that was chosen by the user for Browse view
            $query = preg_replace("/GROUP BY {$previousField}/i", "GROUP BY {$fieldSelector}", $query);
            // group data by the field that was chosen by the user
            $query = preg_replace("/ORDER BY( records( DESC)?,)? {$previousField}/i", "ORDER BY\\1 {$fieldSelector}", $query);
            // order data by the field that was chosen by the user
        }
    }
    // re-establish the original display type:
    // (resetting '$displayType' to its original value is required for Browse view; for List view, it does also correct incorrect
    //  display types such as 'Search' or 'Show'/'Hide' which stem from the submit buttons in the forms of the results header)
    $displayType = $originalDisplayType;
    // the following changes to the SQL query are performed for both forms ("Search within Results" and "Display Options"):
    if ($queryTable == $tableRefs) {
        // if the chosen field is one of the user-specific fields from table 'user_data': 'marked', 'copy', 'selected', 'user_keys', 'user_notes', 'user_file', 'user_groups', 'cite_key' or 'related'
        if (preg_match("/^(marked|copy|selected|user_keys|user_notes|user_file|user_groups|cite_key|related|my fields)\$/i", $fieldSelector)) {
            // 'my fields' is used in Details view as an alias for all user-specific fields
            if (!preg_match("/LEFT JOIN {$tableUserData}/i", $query)) {
                // ...and if the 'LEFT JOIN...' statement isn't already part of the 'FROM' clause...
                $query = preg_replace("/ FROM {$tableRefs}/i", " FROM {$tableRefs} LEFT JOIN {$tableUserData} ON serial = record_id AND user_id = {$userID}", $query);
            }
        }
        // ...add the 'LEFT JOIN...' part to the 'FROM' clause
    } elseif ($queryTable == $tableUsers) {
        // TODO: this wouldn't be necessary if function 'buildSELECTclause()' would handle the requirements of 'users.php' (see also above)
        $query = preg_replace("/ FROM {$tableUsers}/i", ", user_id FROM {$tableUsers}", $query);
        // add 'user_id' column (although it won't be visible the 'user_id' column gets included in every search query)
        // (which is required in order to obtain unique checkbox names as well as for use in the 'getUserID()' function)
    }
    return array($query, $displayType);
}