function srwDiagnostics($diagCode, $diagDetails, $exportStylesheet) { global $contentTypeCharset; // defined in 'ini.inc.php' // Map SRU/W diagnostic numbers to their corresponding messages: $diagMessages = mapSRWDiagnostics(); // function 'mapSRWDiagnostics()' is defined in 'webservice.inc.php' if (isset($diagMessages[$diagCode])) { $diagMessage = $diagMessages[$diagCode]; } else { $diagMessage = "Unknown error"; } $srwCollectionDoc = new XMLDocument(); $srwCollectionDoc->setEncoding($contentTypeCharset); $srwCollection = srwGenerateBaseTags("searchRetrieveResponse"); $diagnosticsBranch = new XMLBranch("srw:diagnostics"); // since we've defined the 'diag' namespace in the <searchRetrieveResponse> element (see function 'srwGenerateBaseTags()'), // we can simply use '<diag:diagnostic>' below; otherwise we should use '<diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">': // addNewBranch($diagnosticsBranch, "diagnostic", array("xmlns" => "http://www.loc.gov/zing/srw/diagnostic/"), ""); $diagnosticsBranch->setTagContent("info:srw/diagnostic/1/" . $diagCode, "srw:diagnostics/diag:diagnostic/uri"); $diagnosticsBranch->setTagContent($diagMessage, "srw:diagnostics/diag:diagnostic/message"); if (!empty($diagDetails)) { $diagnosticsBranch->setTagContent(encodeHTMLspecialchars($diagDetails), "srw:diagnostics/diag:diagnostic/details"); } $srwCollection->addXMLBranch($diagnosticsBranch); $srwCollectionDoc->setXML($srwCollection); $srwCollectionString = $srwCollectionDoc->getXMLString(); // Add the XML Stylesheet definition: // Note that this is just a hack (that should get fixed) since I don't know how to do it properly using the ActiveLink PHP XML Package ?:-/ if (!empty($exportStylesheet)) { $srwCollectionString = preg_replace("/(?=\\<srw:searchRetrieveResponse)/i", "<?xml-stylesheet type=\"text/xsl\" href=\"" . $exportStylesheet . "\"?>\n", $srwCollectionString); } return $srwCollectionString; }
function generateRSS($result, $showRows, $rssChannelDescription) { global $officialDatabaseName; // these variables are defined in 'ini.inc.php' global $databaseBaseURL; global $feedbackEmail; global $defaultCiteStyle; global $contentTypeCharset; global $logoImageURL; global $transtab_refbase_html; // defined in 'transtab_refbase_html.inc.php' // Note that we only convert those entities that are supported by XML (by use of the 'encodeHTMLspecialchars()' function). // All other higher ASCII chars are left unencoded and valid feed output is only possible if the '$contentTypeCharset' variable is set correctly in 'ini.inc.php'. // (The only exception is the item description which will contain HTML tags & entities that were defined by '$transtab_refbase_html' or by the 'reArrangeAuthorContents()' function) // Define inline text markup to be used by the 'citeRecord()' function: $markupPatternsArray = array("bold-prefix" => "<b>", "bold-suffix" => "</b>", "italic-prefix" => "<i>", "italic-suffix" => "</i>", "underline-prefix" => "<u>", "underline-suffix" => "</u>", "endash" => "–", "emdash" => "—", "ampersand" => "&", "double-quote" => """, "single-quote" => "'", "less-than" => "<", "greater-than" => ">", "newline" => "\n<br>\n"); $currentDateTimeStamp = generateRFC2822TimeStamp(); // get the current date & time (in UNIX/RFC-2822 time stamp format => "date('r')" or "date('D, j M Y H:i:s O')") // write RSS header: $rssData = "<?xml version=\"1.0\" encoding=\"" . $contentTypeCharset . "\"?>" . "\n<rss version=\"2.0\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\">"; // write channel info: $rssData .= "\n\t<channel>" . "\n\t\t<title>" . encodeHTMLspecialchars($officialDatabaseName) . "</title>" . "\n\t\t<link>" . $databaseBaseURL . "</link>" . "\n\t\t<description>" . encodeHTMLspecialchars($rssChannelDescription) . "</description>" . "\n\t\t<language>en</language>" . "\n\t\t<pubDate>" . $currentDateTimeStamp . "</pubDate>" . "\n\t\t<lastBuildDate>" . $currentDateTimeStamp . "</lastBuildDate>" . "\n\t\t<webMaster>" . $feedbackEmail . "</webMaster>"; // write image data: $rssData .= "\n\n\t\t<image>" . "\n\t\t\t<url>" . $databaseBaseURL . $logoImageURL . "</url>" . "\n\t\t\t<title>" . encodeHTMLspecialchars($officialDatabaseName) . "</title>" . "\n\t\t\t<link>" . $databaseBaseURL . "</link>" . "\n\t\t</image>"; // fetch results: upto the limit specified in '$showRows', fetch a row into the '$row' array and write out a RSS item: for ($rowCounter = 0; $rowCounter < $showRows && ($row = @mysql_fetch_array($result)); $rowCounter++) { $origTitle = $row['title']; // save the original title contents before applying any search & replace actions // Perform search & replace actions on the text of the 'title' field: // (the array '$transtab_refbase_html' in 'transtab_refbase_html.inc.php' defines which search & replace actions will be employed) $row['title'] = searchReplaceText($transtab_refbase_html, $row['title'], true); // this will provide for correct rendering of italic, super/sub-script and greek letters in item descriptions (which are enclosed by '<![CDATA[...]]>' to ensure well-formed XML); // item titles are still served in raw format, though, since the use of HTML in item titles breaks many news readers $citeStyleFile = getStyleFile($defaultCiteStyle); // fetch the name of the citation style file that's associated with the style given in '$defaultCiteStyle' (which, in turn, is defined in 'ini.inc.php') // include the found citation style file *once*: include_once "cite/" . $citeStyleFile; // instead of 'include_once' we could also use: 'if ($rowCounter == 0) { include "cite/" . $citeStyleFile; }' // Generate a proper citation for this record, ordering attributes according to the chosen output style & record type: $record = citeRecord($row, $defaultCiteStyle, "", $markupPatternsArray, true); // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'styles' directory of the refbase root directory) // To avoid advertising email adresses in public RSS output, we remove the email address from contents of the 'modified_by' field which // get displayed in item descriptions. However, note that email adresses are NOT stripped from contents of the 'created_by' field // since a valid RSS feed must include an email address in the '<author>' element. // The following pattern does not attempt to do fancy parsing of email addresses but simply assumes the string format // of the 'modified_by' field (table 'refs'). If you change the string format, you must modify this pattern as well! $editorName = preg_replace("/(.+?) \\([^)]+\\)/", "\\1", $row['modified_by']); // append a RSS item for the current record: $rssData .= "\n\n\t\t<item>" . "\n\t\t\t<title>" . encodeHTMLspecialchars($origTitle) . "</title>" . "\n\t\t\t<link>" . $databaseBaseURL . "show.php?record=" . $row['serial'] . "</link>" . "\n\t\t\t<description><![CDATA[" . $record . "\n\t\t\t<br><br>Edited by " . encodeHTMLspecialchars($editorName) . " on " . generateRFC2822TimeStamp($row['modified_date'], $row['modified_time']) . ".]]></description>" . "\n\t\t\t<guid isPermaLink=\"true\">" . $databaseBaseURL . "show.php?record=" . $row['serial'] . "</guid>" . "\n\t\t\t<pubDate>" . generateRFC2822TimeStamp($row['created_date'], $row['created_time']) . "</pubDate>" . "\n\t\t\t<author>" . generateRFC2822EmailAddress($row['created_by']) . "</author>" . "\n\t\t</item>"; } // finish RSS data: $rssData .= "\n\n\t</channel>" . "\n</rss>\n"; return $rssData; }
function atomGenerateBaseTags($atomOperation) { global $officialDatabaseName; // these variables are specified in 'ini.inc.php' global $databaseBaseURL; global $feedbackEmail; global $contentTypeCharset; global $convertExportDataToUTF8; global $logoImageURL; global $faviconImageURL; global $query; $atomCollection = new XML("feed"); $atomCollection->setTagAttribute("xmlns", "http://www.w3.org/2005/Atom"); $atomCollection->setTagAttribute("xmlns:opensearch", "http://a9.com/-/spec/opensearch/1.1/"); $atomCollection->setTagAttribute("xmlns:unapi", "http://unapi.info/"); // NOTE: is the unAPI namespace ok? Or should we use "http://unapi.info/specs/", or maybe something like "http://purl.org/unapi/ns/" ? $atomCollection->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); $atomCollection->setTagAttribute("xmlns:dcterms", "http://purl.org/dc/terms/"); $atomCollection->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/"); $officialDatabaseNameConv = encodeHTMLspecialchars($officialDatabaseName); // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php' if ($atomOperation != "Error") { // convert database name to UTF-8: // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already) if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") { $officialDatabaseNameConv = convertToCharacterEncoding("UTF-8", "IGNORE", $officialDatabaseNameConv); } // function 'convertToCharacterEncoding()' is defined in 'include.inc.php' } // ---------------------------------------------------------- // Add feed-level tags: // (not yet used: category, contributor, rights) // - 'title': addNewBranch($atomCollection, "title", array("type" => "text"), $officialDatabaseNameConv); // - 'subtitle': if ($atomOperation == "Error") { addNewBranch($atomCollection, "subtitle", array(), "Search error!"); } else { // ...extract the 'WHERE' clause from the SQL query to include a natural-language version (well, sort of) within the 'subtitle' element: $queryWhereClause = extractWHEREclause($query); // function 'extractWHEREclause()' is defined in 'include.inc.php' // construct a meaningful feed description based on the actual 'WHERE' clause: // TODO: For Atom XML, the query string should not get HTML encoded! $subTitle = "Displays records where " . encodeHTML(explainSQLQuery($queryWhereClause)); // functions 'encodeHTML()' and 'explainSQLQuery()' are defined in 'include.inc.php' addNewBranch($atomCollection, "subtitle", array(), $subTitle); } // - 'updated': // (TODO: the timestamp in the 'updated' element should really only get updated if any of the matching records was updated, right?) addNewBranch($atomCollection, "updated", array(), generateISO8601TimeStamp()); // function 'generateISO8601TimeStamp()' is defined in 'include.inc.php' // - 'author': $authorBranch = new XMLBranch("author"); $authorBranch->setTagContent($officialDatabaseNameConv, "author/name"); $authorBranch->setTagContent($feedbackEmail, "author/email"); $authorBranch->setTagContent($databaseBaseURL, "author/uri"); $atomCollection->addXMLBranch($authorBranch); // - 'generator', 'icon', 'logo': addNewBranch($atomCollection, "generator", array("uri" => "http://www.refbase.net/", "version" => "0.9.5"), "Web Reference Database (http://refbase.sourceforge.net)"); addNewBranch($atomCollection, "icon", array(), $databaseBaseURL . $faviconImageURL); addNewBranch($atomCollection, "logo", array(), $databaseBaseURL . $logoImageURL); // - 'link' (more links will be added in function 'atomCollection()'): // - link to OpenSearch Description file: atomLink($atomCollection, $databaseBaseURL . "opensearch.php?operation=explain", "search", "OpenSearch", $officialDatabaseNameConv); // - link to unAPI server: atomLink($atomCollection, $databaseBaseURL . "unapi.php", "unapi:unapi-server", "unAPI", "unAPI"); return $atomCollection; }
function parseRecord($row, $odfIndexesToRefbaseFieldsArray, $referenceTypesToRefbaseTypesArray, $universalSearchReplaceActionsArray, $fieldSpecificSearchReplaceActionsArray) { global $officialDatabaseName; // these variables are defined in 'ini.inc.php' global $databaseBaseURL; global $contentTypeCharset; global $convertExportDataToUTF8; $fieldParametersArray = array(); // this is a stupid hack that maps the names of the '$row' array keys to those used // by the '$formVars' array (which is required by function 'generateCiteKey()') // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys) $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php' // generate or extract the cite key for this record $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php' // PARSE RECORD: // parse the '$odfIndexesToRefbaseFieldsArray' which maps ODF indexes to refbase field names and assign fields accordingly: foreach ($odfIndexesToRefbaseFieldsArray as $odfIndex => $refbaseField) { if (empty($odfIndexesToRefbaseFieldsArray[$odfIndex])) { $fieldParametersArray[$odfIndex] = ""; // for any unsupported ODF index we'll insert an empty string } else { // copy row field data to array of field parameters (using the corresponding ODF index name as element key): if (!is_array($odfIndexesToRefbaseFieldsArray[$odfIndex])) { if (!empty($refbaseField) and !empty($row[$refbaseField])) { $fieldParametersArray[$odfIndex] = $row[$refbaseField]; } } else { $useDefault = true; // ...we'll extract field data from different refbase fields depending on the current record's reference type: foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex] as $referenceType => $refbaseField) { if ($row['type'] == $referenceType) { $useDefault = false; if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex][$referenceType])) { foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex][$referenceType] as $refbaseField) { if (!empty($refbaseField) and !empty($row[$refbaseField])) { $fieldParametersArray[$odfIndex] = $row[$refbaseField]; break; } } } elseif (!empty($refbaseField) and !empty($row[$refbaseField])) { $fieldParametersArray[$odfIndex] = $row[$refbaseField]; } break; } } // 'Other' is used as default for all refbase types that were NOT explicitly specified: if ($useDefault and !isset($fieldParametersArray[$odfIndex]) and isset($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'])) { if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'])) { foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'] as $refbaseField) { if (!empty($refbaseField) and !empty($row[$refbaseField])) { $fieldParametersArray[$odfIndex] = $row[$refbaseField]; break; } } } elseif (!empty($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']) and !empty($row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']])) { $fieldParametersArray[$odfIndex] = $row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']]; } } // if this ODF field is still not set, 'Any' is used as default, no matter whether any refbase types were specified explicitly or not: if (!isset($fieldParametersArray[$odfIndex]) and isset($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'])) { if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'])) { foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'] as $refbaseField) { if (!empty($refbaseField) and !empty($row[$refbaseField])) { $fieldParametersArray[$odfIndex] = $row[$refbaseField]; break; } } } elseif (!empty($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']) and !empty($row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']])) { $fieldParametersArray[$odfIndex] = $row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']]; } } } // if this ODF field isn't set yet, provide an empty string: if (!isset($fieldParametersArray[$odfIndex])) { $fieldParametersArray[$odfIndex] = ""; } } } // POST-PROCESS FIELD DATA: // currently, we'll always overwrite the record serial in the 'Identifier' field with the generated cite key: // (this means that NO identifier will be exported if you've unchecked the export option "Include cite keys on export") $fieldParametersArray['Identifier'] = $citeKey; // convert refbase type names into ODF type numbers: $fieldParametersArray['BibliographyType'] = $referenceTypesToRefbaseTypesArray[$fieldParametersArray['BibliographyType']]; // for theses, set the correct ODF type: if (!empty($row['thesis'])) { if ($row['thesis'] == "Ph.D. thesis" or $row['thesis'] == "Doctoral thesis") { $fieldParametersArray['BibliographyType'] = "11"; } else { $fieldParametersArray['BibliographyType'] = "9"; } // Thesis if (isset($fieldParametersArray['Annote'])) { $fieldParametersArray['Annote'] .= "; " . $row['thesis']; } else { $fieldParametersArray['Annote'] = $row['thesis']; } } // if a DOI was copied to the URL field, we'll need to add the DOI resolver: if (!empty($row['doi']) and preg_match("/^\\d{2}\\.\\d{4}\\//", $fieldParametersArray['URL'])) { $fieldParametersArray['URL'] = "http://dx.doi.org/" . $fieldParametersArray['URL']; } // use the series volume as volume if 'series_volume' contains some info, but 'volume' doesn't: if (empty($row['volume']) and !empty($row['series_volume'])) { $fieldParametersArray['Volume'] = $row['series_volume']; } // set the fourth ODF custom field to a refbase database attribution string and the database URL: $fieldParametersArray['Custom4'] = "exported from " . $officialDatabaseName . " (" . $databaseBaseURL . ")"; // set the fifth ODF custom field to the record's permanent database URL: $fieldParametersArray['Custom5'] = $databaseBaseURL . "show.php?record=" . $row['serial']; // apply universal search & replace actions, encode special chars and charset conversions to every field that shall be exported: foreach ($fieldParametersArray as $fieldName => $fieldValue) { if (!empty($fieldValue)) { // perform universal search & replace actions: if (!empty($universalSearchReplaceActionsArray)) { $fieldParametersArray[$fieldName] = searchReplaceText($universalSearchReplaceActionsArray, $fieldParametersArray[$fieldName], true); } // function 'searchReplaceText()' is defined in 'include.inc.php' // we only convert those special chars to entities which are supported by XML: $fieldParametersArray[$fieldName] = encodeHTMLspecialchars($fieldParametersArray[$fieldName]); // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php' // convert field data to UTF-8 (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already): // (note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key generation will produce garbled text!) if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") { $fieldParametersArray[$fieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $fieldParametersArray[$fieldName]); } // function 'convertToCharacterEncoding()' is defined in 'include.inc.php' } } // apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray': foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray) { foreach ($fieldParametersArray as $fieldName => $fieldValue) { if (in_array($fieldName, $fieldActionsArray['fields'])) { $fieldParametersArray[$fieldName] = searchReplaceText($fieldActionsArray['actions'], $fieldValue, true); } } } // function 'searchReplaceText()' is defined in 'include.inc.php' return $fieldParametersArray; }
function oaidcRecord($row, $metadataPrefix = "oai_dc", $addNameSpaceInfo = true) { global $databaseBaseURL; // these variables are defined in 'ini.inc.php' global $contentTypeCharset; global $fileVisibility; global $fileVisibilityException; global $filesBaseURL; global $convertExportDataToUTF8; global $defaultCiteStyle; global $citeStyle; global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' // The array '$transtab_refbase_unicode' contains search & replace patterns for conversion from refbase markup to Unicode entities. global $transtab_refbase_unicode; // defined in 'transtab_refbase_unicode.inc.php' // The array '$transtab_refbase_ascii' contains search & replace patterns for conversion from refbase markup to plain text. global $transtab_refbase_ascii; // defined in 'transtab_refbase_ascii.inc.php' // Define inline text markup to generate a plain text citation string: // (to be included within a 'dcterms:bibliographicCitation' element) $markupPatternsArrayPlain = array("bold-prefix" => "", "bold-suffix" => "", "italic-prefix" => "", "italic-suffix" => "", "underline-prefix" => "", "underline-suffix" => "", "endash" => "-", "emdash" => "-", "ampersand" => "&", "double-quote" => '"', "double-quote-left" => '"', "double-quote-right" => '"', "single-quote" => "'", "single-quote-left" => "'", "single-quote-right" => "'", "less-than" => "<", "greater-than" => ">", "newline" => "\n"); // This is a stupid hack that maps the names of the '$row' array keys to those used // by the '$formVars' array (which is required by function 'generateCiteKey()') // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys) $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php' // Generate or extract the cite key for this record: // (to be included within a 'dc:identifier' element) $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php' // Generate OpenURL data: // (to be included within a 'dc:identifier' element) $openURL = openURL($row, "openurl:"); // function 'openURL()' is defined in 'openurl.inc.php' // Encode special chars and perform charset conversions: foreach ($row as $rowFieldName => $rowFieldValue) { // We only convert those special chars to entities which are supported by XML: // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php' $row[$rowFieldName] = encodeHTMLspecialchars($row[$rowFieldName]); // Convert field data to UTF-8: // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already) // (Note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key // generation will produce garbled text!) // function 'convertToCharacterEncoding()' is defined in 'include.inc.php' if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") { $row[$rowFieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $row[$rowFieldName]); } } // Defines field-specific search & replace 'actions' that will be applied to all those refbase fields that are listed in the corresponding 'fields' element: // (If you don't want to perform any search and replace actions, specify an empty array, like: '$fieldSpecificSearchReplaceActionsArray = array();'. // Note that the search patterns MUST include the leading & trailing slashes -- which is done to allow for mode modifiers such as 'imsxU'.) // "/Search Pattern/" => "Replace Pattern" $fieldSpecificSearchReplaceActionsArray = array(); if ($convertExportDataToUTF8 == "yes") { $fieldSpecificSearchReplaceActionsArray[] = array('fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"), 'actions' => $transtab_refbase_unicode); } // Apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray': foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray) { foreach ($row as $rowFieldName => $rowFieldValue) { if (in_array($rowFieldName, $fieldActionsArray['fields'])) { $row[$rowFieldName] = searchReplaceText($fieldActionsArray['actions'], $rowFieldValue, true); } } } // function 'searchReplaceText()' is defined in 'include.inc.php' // Fetch the name of the citation style file that's associated with the style given in '$citeStyle': $citeStyleFile = getStyleFile($citeStyle); // function 'getStyleFile()' is defined in 'include.inc.php' if (empty($citeStyleFile)) { $citeStyle = $defaultCiteStyle; // if the given cite style could not be found, we'll use the default cite style which is defined by the '$defaultCiteStyle' variable in 'ini.inc.php' $citeStyleFile = getStyleFile($citeStyle); } // Include the found citation style file *once*: include_once "cite/" . $citeStyleFile; // Generate a proper citation for this record, ordering attributes according to the chosen output style & record type: // - Plain text version of citation string: $recordCitationPlain = citeRecord($row, $citeStyle, "", $markupPatternsArrayPlain, false); // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'styles' directory of the refbase root directory) // Convert any refbase markup that remains in the citation string (such as _italic_ or **bold**) to plain text: $recordCitationPlain = searchReplaceText($transtab_refbase_ascii, $recordCitationPlain, true); // Convert any remaining refbase markup in the 'title', 'keywords' & 'abstract' fields to plain text: $row['title'] = searchReplaceText($transtab_refbase_ascii, $row['title'], true); $row['keywords'] = searchReplaceText($transtab_refbase_ascii, $row['keywords'], true); $row['abstract'] = searchReplaceText($transtab_refbase_ascii, $row['abstract'], true); // Strip any " (ed)" or " (eds)" suffix from author/editor string: if (preg_match("/ *\\(eds?\\)\$/", $row['author'])) { $row['author'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['author']); } if (preg_match("/ *\\(eds?\\)\$/", $row['editor'])) { $row['editor'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['editor']); } // Include a link to any corresponding file if one of the following conditions is met: // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone' // - the variable '$fileVisibility' is set to 'login' AND the user is logged in // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download' // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0) // // TODO: - the URL-generating code should be made into a dedicated function (since it's shared with 'modsxml.inc.php' and 'atomxml.inc.php') $printURL = false; if ($fileVisibility == "everyone" or $fileVisibility == "login" and isset($_SESSION['loginEmail']) or $fileVisibility == "user-specific" and (isset($_SESSION['user_permissions']) and preg_match("/allow_download/", $_SESSION['user_permissions'])) or !empty($fileVisibilityException) and preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])) { if (!empty($row['file'])) { if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) { $URLprefix = ""; // we don't alter the URL given in the 'file' field } else { // use the base URL of the standard files directory as prefix: if (preg_match('#^/#', $filesBaseURL)) { // absolute path -> file dir is located outside of refbase root dir $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL; } else { // relative path -> file dir is located within refbase root dir $URLprefix = $databaseBaseURL . $filesBaseURL; } } $printURL = true; } } // ---------------------------------------------------------- // Start OAI_DC XML record: if (!empty($metadataPrefix)) { $recordPrefix = $metadataPrefix . ":"; } $record = new XML($recordPrefix . "dc"); // create an XML object for a single record if ($addNameSpaceInfo) { if ($metadataPrefix == "oai_dc") { $record->setTagAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/"); } elseif ($metadataPrefix == "srw_dc") { $record->setTagAttribute("xmlns:srw_dc", "info:srw/schema/1/dc-v1.1"); } $record->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); if ($metadataPrefix == "oai_dc") { $record->setTagAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); $record->setTagAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"); } elseif ($metadataPrefix == "srw_dc") { $record->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/"); } } // Add Dublin Core elements: // NOTE: With a few exceptions, we try to adhere to the guidelines given at // "Using simple Dublin Core to describe eprints" by Andy Powell et al. // See: <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/> // - 'dc:title': if (!empty($row['title'])) { addMetaElement($record, "dc", "title", array(), $row['title']); } // function 'addMetaElement()' is defined in 'webservice.inc.php' // - 'dc:creator': if (!empty($row['author']) and $row['author'] != $row['editor']) { addMetaElement($record, "dc", "creator", array(), $row['author']); } // - 'dc:creator': // TODO: add refbase corporate author(s) as 'dc:creator' // - 'dc:contributor': if (!empty($row['editor'])) { addMetaElement($record, "dc", "contributor", array(), $row['editor']); } // - 'dc:description': if (!empty($row['abstract'])) { addMetaElement($record, "dc", "description", array(), $row['abstract']); } // - 'dc:identifier': // - DOI: if (!empty($row['doi'])) { addMetaElement($record, "dc", "identifier", array(), $row['doi'], "doi"); } // - PMID: if (!empty($row['notes']) and preg_match("/PMID *: *\\d+/i", $row['notes'])) { addMetaElement($record, "dc", "identifier", array(), $row['notes'], "pmid"); } // - arXiv: if (!empty($row['notes']) and preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) { addMetaElement($record, "dc", "identifier", array(), $row['notes'], "arxiv"); } // - ISBN: if (!empty($row['isbn'])) { addMetaElement($record, "dc", "identifier", array(), $row['isbn'], "isbn"); } // - OpenURL: addMetaElement($record, "dc", "identifier", array(), $openURL, "openurl"); // - refbase ID: addMetaElement($record, "dc", "identifier", array(), $databaseBaseURL . generateURL("show.php", "html", array("record" => $row['serial']), true), "url"); // - Cite key: addMetaElement($record, "dc", "identifier", array(), $citeKey, "citekey"); // - Bibliographic citation: // NOTE: In 'atomxml.inc.php', the bibliographic citation is put into a // 'dcterms:bibliographicCitation' element so that it can be uniquely // identified and extracted easily. However, in case of simple Dublin // Core output, we just put it into a 'dc:identifier' element and // use a "citation:" prefix. addMetaElement($record, "dc", "identifier", array(), encodeHTMLspecialchars($recordCitationPlain), "citation"); // - 'dc:source': // NOTE: - In <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>, // Andy Powell et al. recommend that this element should NOT be used! // However, we use 'dc:source' elements for publication & series info // (publication/series title plus volume & issue) to provide a dedicated // source string that's easily readable and parsable. // Example: <dc:source>Polar Biology, Vol. 25, No. 10</dc:source> // - While we could also append the page info to the publication // 'dc:source' element, this info is more pertinent to the article // itself and is thus not included. For 'srw_dc:dc' output, page info is // included in PRISM elements (see below). // - All metadata (including the page info) are also provided as a machine // parsable citation in form of an OpenURL ContextObject (see above). // - Publication info: // NOTE: We only include the 'dc:source' element for 'oai_dc:dc' output. In case of 'srw_dc:dc' // output, we use the more fine-grained PRISM elements instead (see below) if ($metadataPrefix == "oai_dc" and (!empty($row['publication']) or !empty($row['abbrev_journal']))) { if (!empty($row['publication'])) { $source = $row['publication']; } elseif (!empty($row['abbrev_journal'])) { $source = $row['abbrev_journal']; } if (!empty($row['volume'])) { $source .= ", Vol. " . $row['volume']; } if (!empty($row['issue'])) { $source .= ", No. " . $row['issue']; } if (!empty($source)) { addMetaElement($record, "dc", "source", array(), $source); } } // - Series info: if (!empty($row['series_title']) or !empty($row['abbrev_series_title'])) { if (!empty($row['series_title'])) { $series = $row['series_title']; } elseif (!empty($row['abbrev_series_title'])) { $series = $row['abbrev_series_title']; } if (!empty($row['series_volume'])) { $series .= ", Vol. " . $row['series_volume']; } if (!empty($row['series_issue'])) { $series .= ", No. " . $row['series_issue']; } if (!empty($series)) { addMetaElement($record, "dc", "source", array(), $series); } // NOTE: To distinguish between regular publication & series info, // should we better use a "series:" prefix here? If so, use: // addMetaElement($record, "dc", "source", array(), $series, "series"); } // - ISSN: // NOTE: for 'srw_dc:dc' output, we put the ISSN into the 'prism:issn' element if ($metadataPrefix == "oai_dc" and !empty($row['issn'])) { addMetaElement($record, "dc", "source", array(), $row['issn'], "issn"); } // - 'dc:date': if (!empty($row['year'])) { addMetaElement($record, "dc", "date", array(), $row['year']); } // - 'dc:type': if (!empty($row['type'])) { addMetaElement($record, "dc", "type", array(), $row['type'], $row['thesis']); } // In case of a thesis, we add another 'dc:type' element with the actual thesis type: if (!empty($row['thesis'])) { addMetaElement($record, "dc", "type", array(), $row['thesis']); } // - 'dc:format': // TODO: ideally, we should parse the content of the refbase 'medium' field and map it // to a media-type term from <http://www.iana.org/assignments/media-types/> if (!empty($row['medium'])) { $mediaType = $row['medium']; } else { $mediaType = "text"; } addMetaElement($record, "dc", "format", array(), $mediaType); // - 'dc:subject': // TODO: add user-specific keywords (from field 'user_keys') if the user is logged in if (!empty($row['keywords'])) { addMetaElement($record, "dc", "subject", array(), $row['keywords']); } // - 'dc:coverage': // TODO: should we add contents from the refbase 'area' field as 'dc:coverage' element(s)? // - 'dc:relation': // - Related URL: if (!empty($row['url'])) { addMetaElement($record, "dc", "relation", array(), $row['url'], "url"); } // - Related FILE: if ($printURL) { addMetaElement($record, "dc", "relation", array(), $URLprefix . $row['file'], "file"); } // - 'dc:publisher': if (!empty($row['publisher'])) { addMetaElement($record, "dc", "publisher", array(), $row['publisher']); } // - 'dc:language': // TODO: convert to ISO notation (i.e. "en" instead of "English", etc) if (!empty($row['language'])) { addMetaElement($record, "dc", "language", array(), $row['language']); } // ---------------------------------------------------------- // Add PRISM elements: // NOTE: When using the 'srw_dc' namespace (i.e. 'info:srw/schema/1/dc-v1.1' as detailed at // <http://www.loc.gov/standards/sru/resources/dc-schema.html>), I don't think it's allowed // to include anything but the fifteen elements from simple Dublin Core. Is this correct? // If so, then: // // TODO: Do we need to put the PRISM elements in <extraRecordData> instead? Or can we put them within // a separate branch outside of (and next to) the '<srw_dc:dc>' element? Or shall we better omit // them entirely? // More info on SRU Extra Data>: <http://www.loc.gov/standards/sru/specs/extra-data.html> // // See also "Mixing DC metadata with other metadata schemas" in "Guidelines for implementing // Dublin Core in XML" <http://dublincore.org/documents/dc-xml-guidelines/> if ($metadataPrefix == "srw_dc") { // - 'prism:issn': if (!empty($row['issn'])) { addMetaElement($record, "prism", "issn", array(), $row['issn']); } // - 'prism:publicationName': if (!empty($row['publication'])) { addMetaElement($record, "prism", "publicationName", array(), $row['publication']); } elseif (!empty($row['abbrev_journal'])) { addMetaElement($record, "prism", "publicationName", array(), $row['abbrev_journal']); } // - 'prism:publicationDate': if (!empty($row['year'])) { addMetaElement($record, "prism", "publicationDate", array(), $row['year']); } // - 'prism:volume': if (!empty($row['volume'])) { addMetaElement($record, "prism", "volume", array(), $row['volume']); } // - 'prism:number': if (!empty($row['issue'])) { addMetaElement($record, "prism", "number", array(), $row['issue']); } // - 'prism:startingPage', 'prism:endingPage': // TODO: Similar code is used in 'include.in.php', 'modsxml.inc.php' and 'openurl.inc.php', // so this should be made into a dedicated function! if (!empty($row['pages']) and preg_match("/\\d+/i", $row['pages'])) { $pages = preg_replace("/^\\D*(\\d+)( *[{$dash}]+ *\\d+)?.*/i{$patternModifiers}", "\\1\\2", $row['pages']); // extract page range (if there's any), otherwise just the first number $startPage = preg_replace("/^\\D*(\\d+).*/i", "\\1", $row['pages']); // extract starting page $endPage = extractDetailsFromField("pages", $pages, "/\\D+/", "[-1]"); // extract ending page (function 'extractDetailsFromField()' is defined in 'include.inc.php') // NOTE: To extract the ending page, we'll use function 'extractDetailsFromField()' // instead of just grabbing a matched regex pattern since it'll also work // when just a number but no range is given (e.g. when startPage = endPage) // - 'prism:startingPage': if (preg_match("/\\d+ *[{$dash}]+ *\\d+/i{$patternModifiers}", $row['pages'])) { // if there's a page range addMetaElement($record, "prism", "startingPage", array(), $startPage); } // - 'prism:endingPage': addMetaElement($record, "prism", "endingPage", array(), $endPage); } } return $record; }