function findDuplicates($sqlQuery, $originalDisplayType) { global $tableRefs, $tableUserData; // defined in 'db.inc.php' global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' // re-assign the correct display type (i.e. the view that was active when the user clicked the 'dups' link in the header): if (!empty($originalDisplayType)) { $displayType = $originalDisplayType; } // Extract form variables provided by the 'duplicateSearch' form in 'duplicate_search.php': if (isset($_REQUEST['matchFieldsSelector'])) { if (is_string($_REQUEST['matchFieldsSelector'])) { // we accept a string containing a (e.g. comma delimited) list of field names $selectedFieldsArray = preg_split("/[^a-z_]+/", $_REQUEST['matchFieldsSelector'], -1, PREG_SPLIT_NO_EMPTY); } else { // the field list is already provided as array: $selectedFieldsArray = $_REQUEST['matchFieldsSelector']; } } else { $selectedFieldsArray = array(); } if (isset($_REQUEST['ignoreWhitespace']) and $_REQUEST['ignoreWhitespace'] == "1") { $ignoreWhitespace = "1"; } else { $ignoreWhitespace = "0"; } if (isset($_REQUEST['ignorePunctuation']) and $_REQUEST['ignorePunctuation'] == "1") { $ignorePunctuation = "1"; } else { $ignorePunctuation = "0"; } if (isset($_REQUEST['ignoreCharacterCase']) and $_REQUEST['ignoreCharacterCase'] == "1") { $ignoreCharacterCase = "1"; } else { $ignoreCharacterCase = "0"; } if (isset($_REQUEST['ignoreAuthorInitials']) and $_REQUEST['ignoreAuthorInitials'] == "1") { $ignoreAuthorInitials = "1"; } else { $ignoreAuthorInitials = "0"; } if (isset($_REQUEST['nonASCIIChars'])) { $nonASCIIChars = $_REQUEST['nonASCIIChars']; } else { $nonASCIIChars = "keep"; } // VALIDATE FORM DATA: $errors = array(); // Validate the field selector: if (empty($selectedFieldsArray)) { $errors["matchFieldsSelector"] = "You must select at least one field:"; } // Validate the 'SQL Query' field: if (empty($sqlQuery)) { $errors["sqlQuery"] = "You must specify a query string:"; } elseif (!preg_match("/^SELECT/i", $sqlQuery)) { $errors["sqlQuery"] = "You can only execute SELECT queries:"; } // Check if there were any errors: if (count($errors) > 0) { // In case of an error, we write all form variables back to the '$formVars' array // (which 'duplicate_search.php' requires to reload form values): foreach ($_REQUEST as $varname => $value) { $formVars[$varname] = $value; } // Since checkbox form fields do only get included in the '$_REQUEST' array if they were marked, // we have to add appropriate array elements for all checkboxes that weren't set: if (!isset($formVars["ignoreWhitespace"])) { $formVars["ignoreWhitespace"] = "0"; } if (!isset($formVars["ignorePunctuation"])) { $formVars["ignorePunctuation"] = "0"; } if (!isset($formVars["ignoreCharacterCase"])) { $formVars["ignoreCharacterCase"] = "0"; } if (!isset($formVars["ignoreAuthorInitials"])) { $formVars["ignoreAuthorInitials"] = "0"; } if (!isset($formVars["showLinks"])) { $formVars["showLinks"] = "0"; } // Write back session variables: saveSessionVariable("errors", $errors); // function 'saveSessionVariable()' is defined in 'include.inc.php' saveSessionVariable("formVars", $formVars); // There are errors. Relocate back to 'duplicate_search.php': header("Location: duplicate_search.php"); exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< } // CONSTRUCT SQL QUERY (1. DUPLICATE SEARCH): // To identify any duplicates within the results of the original query, we build a new query based on the original SQL query: $query = $sqlQuery; // Replace SELECT list of columns with those from '$selectedFieldsArray' (plus the 'serial' column): $selectedFieldsString = implode(", ", $selectedFieldsArray); $query = newSELECTclause("SELECT " . $selectedFieldsString . ", serial", $query, false); // function 'newSELECTclause()' is defined in 'include.inc.php' // Replace any existing ORDER BY clause with the list of columns given in '$selectedFieldsArray': $query = newORDERclause("ORDER BY " . $selectedFieldsString, $query, false); // function 'newORDERclause()' is defined in 'include.inc.php' // Fix escape sequences within the SQL query: $query = stripSlashesIfMagicQuotes($query); // RUN the query on the database through the connection: $result = queryMySQLDatabase($query); // function 'queryMySQLDatabase()' is defined in 'include.inc.php' // PROCESS RESULTS: $recordSerialsArray = array(); $duplicateRecordSerialsArray = array(); $rowsFound = @mysql_num_rows($result); // Identify any records with matching field data: if ($rowsFound > 0) { // Count the number of fields: $fieldsFound = mysql_num_fields($result); // Loop over each row in the result set: for ($rowCounter = 0; $row = @mysql_fetch_array($result); $rowCounter++) { $recordIdentifier = ""; // make sure our buffer variable is empty // For each row, loop over each field (except for the last one which is the 'serial' field): for ($i = 0; $i < $fieldsFound - 1; $i++) { // fetch the current attribute name: $fieldName = getMySQLFieldInfo($result, $i, "name"); // function 'getMySQLFieldInfo()' is defined in 'include.inc.php' // normalize author names: if ($fieldName == "author" and $ignoreAuthorInitials == "1") { // this is a stupid hack that maps the names of the '$row' array keys to those used // by the '$formVars' array (which is required by function 'parsePlaceholderString()') // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys) $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php' // ignore initials in author names: $row[$i] = parsePlaceholderString($formVars, "<:authors[0||]:>", ""); // function 'parsePlaceholderString()' is defined in 'include.inc.php' } $recordIdentifier .= $row[$i]; // merge all field values to form a unique record identifier string } // Normalize record identifier string: if ($ignoreWhitespace == "1") { // ignore whitespace $recordIdentifier = preg_replace("/\\s+/", "", $recordIdentifier); } if ($ignorePunctuation == "1") { // ignore punctuation $recordIdentifier = preg_replace("/[{$punct}]+/{$patternModifiers}", "", $recordIdentifier); } if ($ignoreCharacterCase == "1") { // ignore character case $recordIdentifier = strtolower($recordIdentifier); } if ($nonASCIIChars == "strip") { // strip non-ASCII characters $recordIdentifier = handleNonASCIIAndUnwantedCharacters($recordIdentifier, "\\S\\s", "strip"); } elseif ($nonASCIIChars == "transliterate") { // transliterate non-ASCII characters $recordIdentifier = handleNonASCIIAndUnwantedCharacters($recordIdentifier, "\\S\\s", "transliterate"); } // Check whether the record identifier string has occurred already: if (isset($recordSerialsArray[$recordIdentifier])) { // this record identifier string has already been seen $recordSerialsArray[$recordIdentifier][] = $row["serial"]; } else { // new record identifier string $recordSerialsArray[$recordIdentifier] = array($row["serial"]); } // add a new array element for this record's identifier string (and store its serial number as value within a sub-array) } // Collect all array elements from '$recordSerialsArray' where their sub-array contains more than one serial number: foreach ($recordSerialsArray as $recordSerials) { if (count($recordSerials) > 1) { foreach ($recordSerials as $recordSerial) { $duplicateRecordSerialsArray[] = $recordSerial; } } // add this record's serial number to the array of duplicate record serials } } else { // TODO! } if (empty($duplicateRecordSerialsArray)) { $duplicateRecordSerialsArray[] = "0"; } // if no duplicate records were found, the non-existing serial number '0' will result in a "nothing found" feedback // CONSTRUCT SQL QUERY (2. DUPLICATES DISPLAY): // To display any duplicates that were found within the results of the original query, we build again a new query based on the original SQL query: $query = $sqlQuery; // Replace WHERE clause: // TODO: maybe make this into a generic function? (compare with function 'extractWHEREclause()' in 'include.inc.php') $duplicateRecordSerialsString = implode("|", $duplicateRecordSerialsArray); $query = preg_replace("/(?<=WHERE )(.+?)(?= ORDER BY| LIMIT| GROUP BY| HAVING| PROCEDURE| FOR UPDATE| LOCK IN|[ ;]+(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP|FILE)\\b|\$)/i", "serial RLIKE \"^(" . $duplicateRecordSerialsString . ")\$\"", $query); // Replace any existing ORDER BY clause with the list of columns given in '$selectedFieldsArray': $query = newORDERclause("ORDER BY " . $selectedFieldsString, $query, false); return array($query, $displayType); }
function extractFormElementsRefineDisplay($queryTable, $displayType, $originalDisplayType, $query, $showLinks, $citeOrder, $userID) { global $tableRefs, $tableUserData, $tableUsers; // defined in 'db.inc.php' global $loc; // '$loc' is made globally available in 'core.php' $encodedDisplayType = encodeHTML($displayType); // note that we need to HTML encode '$displayType' for comparison with the HTML encoded locales // extract form variables: if ($encodedDisplayType == $loc["ButtonTitle_Search"]) { $fieldSelector = $_REQUEST['refineSearchSelector']; // extract field name chosen by the user $refineSearchName = $_REQUEST['refineSearchName']; // extract search text entered by the user if (isset($_REQUEST['refineSearchExclude'])) { // extract user option whether matched records should be included or excluded $refineSearchActionCheckbox = $_REQUEST['refineSearchExclude']; } else { $refineSearchActionCheckbox = "0"; } // the user did NOT mark the checkbox next to "Exclude matches" } elseif (preg_match("/^(" . $loc["ButtonTitle_Show"] . "|" . $loc["ButtonTitle_Hide"] . "|" . $loc["ButtonTitle_Browse"] . ")\$/", $encodedDisplayType)) { if (isset($_REQUEST['displayOptionsSelector'])) { $fieldSelector = $_REQUEST['displayOptionsSelector']; } else { $fieldSelector = ""; } } else { $fieldSelector = ""; } // this avoids 'Undefined variable...' messages when a user has changed the language setting on the options page, and then reloads an existing page (whose URL still has a 'submit' value in the previously used language) // extract the fields of the SELECT clause from the current SQL query: $previousSelectClause = extractSELECTclause($query); // ensure to add any required fields to the SELECT clause: if ($queryTable == $tableRefs) { // 'search.php': $addRequiredFields = true; } elseif ($queryTable == $tableUsers) { // 'users.php': $addRequiredFields = false; } // we'll add any required fields to the 'users.php' SELECT clause below // TODO: this wouldn't be necessary if function 'buildSELECTclause()' would handle the requirements of 'users.php' $additionalFields = ""; if ($encodedDisplayType == $loc["ButtonTitle_Search"]) { // rebuild the current SELECT clause: $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause); // replace current SELECT clause: $query = newSELECTclause($newSelectClause, $query, false); if ($refineSearchName != "") { // Depending on the chosen output action, construct an appropriate SQL query: if ($refineSearchActionCheckbox == "0") { // for the fields 'marked=no', 'copy=false' and 'selected=no', force NULL values to be matched: if ($fieldSelector == "marked" and $refineSearchName == "no" or $fieldSelector == "copy" and $refineSearchName == "false" or $fieldSelector == "selected" and $refineSearchName == "no") { $query = preg_replace("/ WHERE /i", " WHERE ({$fieldSelector} RLIKE " . quote_smart($refineSearchName) . " OR {$fieldSelector} IS NULL) AND ", $query); } else { // add default 'WHERE' clause: $query = preg_replace("/ WHERE /i", " WHERE {$fieldSelector} RLIKE " . quote_smart($refineSearchName) . " AND ", $query); } // ...add search field name & value to the SQL query } else { $query = preg_replace("/ WHERE /i", " WHERE ({$fieldSelector} NOT RLIKE " . quote_smart($refineSearchName) . " OR {$fieldSelector} IS NULL) AND ", $query); // ...add search field name & value to the SQL query } $query = preg_replace('/ AND serial RLIKE "\\.\\+"/i', '', $query); // remove any 'AND serial RLIKE ".+"' which isn't required anymore } // else, if the user did NOT type a search string into the text entry field, we simply keep the old WHERE clause... } elseif (preg_match("/^(" . $loc["ButtonTitle_Show"] . "|" . $loc["ButtonTitle_Hide"] . ")\$/", $encodedDisplayType)) { if (preg_match("/^Cite\$/i", $originalDisplayType)) { // generate a SELECT clause that's appropriate for Citation view (or Details view): $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields); // rebuild the current ORDER clause: if (preg_match("/^(author|year|type|type-year|creation-date)\$/i", $citeOrder)) { if ($citeOrder == "year") { // sort records first by year (descending): $newORDER = "ORDER BY year DESC, first_author, author_count, author, title"; } elseif ($citeOrder == "type") { // sort records first by record type and thesis type (descending): $newORDER = "ORDER BY type DESC, thesis DESC, first_author, author_count, author, year, title"; } elseif ($citeOrder == "type-year") { // sort records first by record type and thesis type (descending), then by year (descending): $newORDER = "ORDER BY type DESC, thesis DESC, year DESC, first_author, author_count, author, title"; } elseif ($citeOrder == "creation-date") { // sort records such that newly added/edited records get listed top of the list: $newORDER = "ORDER BY created_date DESC, created_time DESC, modified_date DESC, modified_time DESC, serial DESC"; } elseif ($citeOrder == "author") { // supply the default ORDER BY pattern (which is suitable for citation in a journal etc.): $newORDER = "ORDER BY first_author, author_count, author, year, title"; } // replace current ORDER clause: $query = newORDERclause($newORDER, $query, false); } // else if any other or no '$citeOrder' parameter is specified, we keep the current ORDER BY clause // NOTE: this behaviour is different from functions 'extractFormElementsQueryResults()' and 'extractFormElementsExtract()' // where we always use 'ORDER BY first_author, author_count, author, year, title' as default ORDER BY clause // (to ensure correct sorting for output to bibliographic reference lists) } elseif (preg_match("/^Display\$/i", $originalDisplayType)) { // NOTE: the below code for displaying & hiding of fields in Details view must be adopted if either layout or field names are changed! $fieldsList = ""; if ($fieldSelector == "all fields") { // generate a SELECT clause that shows all fields in Details view: $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, true, $addRequiredFields); } else { if ($encodedDisplayType == $loc["ButtonTitle_Show"]) { $matchField = "pages"; if ($fieldSelector == "keywords, abstract") { $fieldsList = ", keywords, abstract"; } elseif ($fieldSelector == "additional fields") { $fieldsList = ", address, corporate_author, thesis, publisher, place, editor, language, summary_language, orig_title, series_editor, series_title, abbrev_series_title, series_volume, series_issue, edition, issn, isbn, medium, area, expedition, conference, notes, approved"; if (isset($_SESSION['loginEmail'])) { $fieldsList .= ", location"; } // we only add the 'location' field if the user is logged in if (preg_match("/\\babstract\\b/i", $previousSelectClause)) { $matchField = "abstract"; } } elseif ($fieldSelector == "my fields") { $fieldsList = ", marked, copy, selected, user_keys, user_notes, user_file, user_groups, cite_key"; if (preg_match("/\\bserial\\b/i", $previousSelectClause)) { $matchField = "serial"; } elseif (preg_match("/\\babstract\\b/i", $previousSelectClause)) { $matchField = "abstract"; } } if (!empty($fieldsList) and !preg_match("/\\b" . $fieldsList . "\\b/i", $previousSelectClause)) { // if none of the chosen fields are currently displayed... $previousSelectClause = preg_replace("/(?<=\\b" . $matchField . "\\b)/i", $fieldsList, $previousSelectClause); } // ...add the chosen fields to the current SELECT clause: } if ($encodedDisplayType == $loc["ButtonTitle_Hide"]) { if ($fieldSelector == "keywords, abstract") { $fieldsList = "\\b(keywords|abstract)\\b"; } elseif ($fieldSelector == "additional fields") { $fieldsList = "\\b(corporate_author|thesis|address|publisher|place|editor|language|summary_language|orig_title|series_editor|series_title|abbrev_series_title|series_volume|series_issue|edition|issn|isbn|medium|area|expedition|conference|notes|approved|location)\\b"; } elseif ($fieldSelector == "my fields") { $fieldsList = "\\b(marked|copy|selected|user_keys|user_notes|user_file|user_groups|cite_key)\\b"; } if (!empty($fieldsList) and preg_match("/\\b" . $fieldsList . "\\b/i", $previousSelectClause)) { // ...remove the chosen fields from the fields given in the current SELECT clause: $previousSelectClause = preg_replace("/ *, *" . $fieldsList . " */i", "", $previousSelectClause); // all columns except the first $previousSelectClause = preg_replace("/ *" . $fieldsList . " *, */i", "", $previousSelectClause); // all columns except the last } } // rebuild the current SELECT clause, but include (or exclude) the chosen fields: $newSelectClause = buildSELECTclause($originalDisplayType, $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause); } } else { if ($encodedDisplayType == $loc["ButtonTitle_Show"]) { if (!preg_match("/\\b" . $fieldSelector . "\\b/i", $previousSelectClause)) { // ...and the chosen field is *not* already displayed... $additionalFields = $fieldSelector; } // ...add the chosen field to the current SELECT clause } elseif ($encodedDisplayType == $loc["ButtonTitle_Hide"]) { if (preg_match("/\\b" . $fieldSelector . "\\b/i", $previousSelectClause)) { // ...remove the chosen field from the fields given in the current SELECT clause: $previousSelectClause = preg_replace("/ *, *\\b" . $fieldSelector . "\\b */i", "", $previousSelectClause); // all columns except the first $previousSelectClause = preg_replace("/ *\\b" . $fieldSelector . "\\b *, */i", "", $previousSelectClause); // all columns except the last } } // rebuild the current SELECT clause, but include (or exclude) the chosen field: $newSelectClause = buildSELECTclause("", $showLinks, $additionalFields, false, $addRequiredFields, $previousSelectClause); } // replace current SELECT clause: $query = newSELECTclause($newSelectClause, $query, false); } elseif ($encodedDisplayType == $loc["ButtonTitle_Browse"]) { $previousField = preg_replace("/^SELECT (\\w+).+/i", "\\1", $query); // extract the field that was previously used in Browse view if (!preg_match("/^" . $fieldSelector . "\$/i", $previousField)) { // ...modify the SQL query to show a summary for the new field that was chosen by the user: // (NOTE: these replace patterns aren't 100% safe and may fail if the user has modified the query using 'sql_search.php'!) $query = preg_replace("/^SELECT {$previousField}/i", "SELECT {$fieldSelector}", $query); // use the field that was chosen by the user for Browse view $query = preg_replace("/GROUP BY {$previousField}/i", "GROUP BY {$fieldSelector}", $query); // group data by the field that was chosen by the user $query = preg_replace("/ORDER BY( records( DESC)?,)? {$previousField}/i", "ORDER BY\\1 {$fieldSelector}", $query); // order data by the field that was chosen by the user } } // re-establish the original display type: // (resetting '$displayType' to its original value is required for Browse view; for List view, it does also correct incorrect // display types such as 'Search' or 'Show'/'Hide' which stem from the submit buttons in the forms of the results header) $displayType = $originalDisplayType; // the following changes to the SQL query are performed for both forms ("Search within Results" and "Display Options"): if ($queryTable == $tableRefs) { // if the chosen field is one of the user-specific fields from table 'user_data': 'marked', 'copy', 'selected', 'user_keys', 'user_notes', 'user_file', 'user_groups', 'cite_key' or 'related' if (preg_match("/^(marked|copy|selected|user_keys|user_notes|user_file|user_groups|cite_key|related|my fields)\$/i", $fieldSelector)) { // 'my fields' is used in Details view as an alias for all user-specific fields if (!preg_match("/LEFT JOIN {$tableUserData}/i", $query)) { // ...and if the 'LEFT JOIN...' statement isn't already part of the 'FROM' clause... $query = preg_replace("/ FROM {$tableRefs}/i", " FROM {$tableRefs} LEFT JOIN {$tableUserData} ON serial = record_id AND user_id = {$userID}", $query); } } // ...add the 'LEFT JOIN...' part to the 'FROM' clause } elseif ($queryTable == $tableUsers) { // TODO: this wouldn't be necessary if function 'buildSELECTclause()' would handle the requirements of 'users.php' (see also above) $query = preg_replace("/ FROM {$tableUsers}/i", ", user_id FROM {$tableUsers}", $query); // add 'user_id' column (although it won't be visible the 'user_id' column gets included in every search query) // (which is required in order to obtain unique checkbox names as well as for use in the 'getUserID()' function) } return array($query, $displayType); }