function unapiExplainResponse($unapiID) { global $contentTypeCharset; // these variables are specified in 'ini.inc.php' $unapiCollectionDoc = new XMLDocument(); $unapiCollectionDoc->setEncoding($contentTypeCharset); $unapiCollection = new XML("formats"); if (!empty($unapiID)) { $unapiCollection->setTagAttribute("id", $unapiID); } // Recommended format names are given at <http://unapi.stikipad.com/unapi/show/existing+formats> // TODO: add 'ISI', 'ODF XML' and 'Word XML' addNewBranch($unapiCollection, "format", array("name" => "bibtex", "type" => "text/plain", "docs" => "http://en.wikipedia.org/wiki/BibTeX"), ""); // function 'addNewBranch()' is defined in 'webservice.inc.php' addNewBranch($unapiCollection, "format", array("name" => "endnote", "type" => "text/plain", "docs" => "http://www.ecst.csuchico.edu/~jacobsd/bib/formats/endnote.html"), ""); addNewBranch($unapiCollection, "format", array("name" => "ris", "type" => "text/plain", "docs" => "http://www.adeptscience.co.uk/kb/article/A626"), ""); addNewBranch($unapiCollection, "format", array("name" => "atom", "type" => "application/atom+xml", "docs" => "http://www.atomenabled.org/developers/syndication/"), ""); addNewBranch($unapiCollection, "format", array("name" => "mods", "type" => "application/xml", "docs" => "http://www.loc.gov/standards/mods/"), ""); addNewBranch($unapiCollection, "format", array("name" => "oai_dc", "type" => "application/xml", "docs" => "http://www.openarchives.org/OAI/openarchivesprotocol.html#dublincore"), ""); addNewBranch($unapiCollection, "format", array("name" => "srw_dc", "type" => "application/xml", "docs" => "http://www.loc.gov/standards/sru/"), ""); addNewBranch($unapiCollection, "format", array("name" => "srw_mods", "type" => "application/xml", "docs" => "http://www.loc.gov/standards/sru/"), ""); addNewBranch($unapiCollection, "format", array("name" => "html", "type" => "text/html", "docs" => "http://www.w3.org/MarkUp/"), ""); addNewBranch($unapiCollection, "format", array("name" => "rtf", "type" => "application/rtf", "docs" => "http://en.wikipedia.org/wiki/Rich_Text_Format"), ""); addNewBranch($unapiCollection, "format", array("name" => "pdf", "type" => "application/pdf", "docs" => "http://partners.adobe.com/public/developer/pdf/index_reference.html"), ""); addNewBranch($unapiCollection, "format", array("name" => "latex", "type" => "application/x-latex", "docs" => "http://en.wikipedia.org/wiki/LaTeX"), ""); addNewBranch($unapiCollection, "format", array("name" => "markdown", "type" => "text/plain", "docs" => "http://daringfireball.net/projects/markdown/"), ""); addNewBranch($unapiCollection, "format", array("name" => "text", "type" => "text/plain"), ""); $unapiCollectionDoc->setXML($unapiCollection); $unapiCollectionString = $unapiCollectionDoc->getXMLString(); return $unapiCollectionString; }
function srwGenerateBaseTags($srwOperation) { global $exportFormat; // this is needed so that we can distinguish between "SRW_DC XML" and "SRW_MODS XML" record formats $srwCollection = new XML("srw:" . $srwOperation); $srwCollection->setTagAttribute("xmlns:srw", "http://www.loc.gov/zing/srw/"); if ($srwOperation == "searchRetrieveResponse") { $srwCollection->setTagAttribute("xmlns:diag", "http://www.loc.gov/zing/srw/diagnostic/"); $srwCollection->setTagAttribute("xmlns:xcql", "http://www.loc.gov/zing/cql/xcql/"); if (preg_match("/DC/i", $exportFormat)) { $srwCollection->setTagAttribute("xmlns:srw_dc", "info:srw/schema/1/dc-v1.1"); $srwCollection->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); $srwCollection->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/"); } else { // add namespace declarations for "SRW_MODS XML": $srwCollection->setTagAttribute("xmlns:mods", "http://www.loc.gov/mods/v3"); } } // elseif ($srwOperation == "explainResponse") // { // $srwCollection->setTagAttribute("xmlns:zr", "http://explain.z3950.org/dtd/2.0/"); // } addNewBranch($srwCollection, "srw:version", array(), "1.1"); // function 'addNewBranch()' is defined in 'webservice.inc.php' return $srwCollection; }
function modsRecord($row) { global $databaseBaseURL; // these variables are defined in 'ini.inc.php' global $contentTypeCharset; global $fileVisibility; global $fileVisibilityException; global $filesBaseURL; global $convertExportDataToUTF8; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; $exportPrivate = True; // This will be a global variable or will be used // when modsRow is called and will determine if we // export user-specific data $exportRecordURL = True; // Specifies whether an attribution string containing // the URL to the refbase database record (and the last // modification date) shall be written to the notes branch. // Note that this string is required by the "-A|--append" // feature of the 'refbase' command line client // convert this record's modified date/time info to UNIX time stamp format: // => "date('D, j M Y H:i:s O')", e.g. "Sat, 15 Jul 2006 22:24:16 +0200" // function 'generateRFC2822TimeStamp()' is defined in 'include.inc.php' $currentDateTimeStamp = generateRFC2822TimeStamp($row['modified_date'], $row['modified_time']); // --- BEGIN TYPE * --- // | // | These apply to everything // this is a stupid hack that maps the names of the '$row' array keys to those used // by the '$formVars' array (which is required by function 'generateCiteKey()') // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys) $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php' // generate or extract the cite key for this record // (note that charset conversion can only be done *after* the cite key has been generated, // otherwise cite key generation will produce garbled text!) $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php' // Create an XML object for a single record. $record = new XML("mods"); $record->setTagAttribute("version", "3.2"); if (!empty($citeKey)) { $record->setTagAttribute("ID", $citeKey); } // titleInfo // Regular Title if (!empty($row['title'])) { $record->setTagContent(encodeXMLField('title', $row['title']), "mods/titleInfo/title"); } // Translated Title // NOTE: This field is excluded by the default cite SELECT method if (!empty($row['orig_title'])) { $orig_title = new XMLBranch("titleInfo"); $orig_title->setTagAttribute("type", "translated"); $orig_title->setTagContent(encodeXMLField('orig_title', $row['orig_title']), "titleInfo/title"); $record->addXMLBranch($orig_title); } // name // author if (!empty($row['author'])) { if (preg_match("/ *\\(eds?\\)\$/", $row['author'])) { $author = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['author']); $nameArray = separateNames("author", "/\\s*;\\s*/", "/\\s*,\\s*/", "/(?<=^|[{$word}])[^-{$word}]+|(?<=^|[{$upper}])(?=\$|[{$upper}])/{$patternModifiers}", $author, "personal", "editor"); } else { if ($row['type'] == "Map") { $nameArray = separateNames("author", "/\\s*;\\s*/", "/\\s*,\\s*/", "/(?<=^|[{$word}])[^-{$word}]+|(?<=^|[{$upper}])(?=\$|[{$upper}])/{$patternModifiers}", $row['author'], "personal", "cartographer"); } else { $nameArray = separateNames("author", "/\\s*;\\s*/", "/\\s*,\\s*/", "/(?<=^|[{$word}])[^-{$word}]+|(?<=^|[{$upper}])(?=\$|[{$upper}])/{$patternModifiers}", $row['author'], "personal", "author"); } } foreach ($nameArray as $singleName) { $record->addXMLBranch($singleName); } } // originInfo if (!empty($row['year']) || !empty($row['publisher']) || !empty($row['place'])) { $origin = new XMLBranch("originInfo"); // dateIssued if (!empty($row['year'])) { $origin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued"); } // Book Chapters and Journal Articles only have a dateIssued // (editions, places, and publishers are associated with the host) if (!preg_match("/^(Book Chapter|Journal Article)\$/", $row['type'])) { // publisher if (!empty($row['publisher'])) { $origin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher"); } // place if (!empty($row['place'])) { $origin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm"); $origin->setTagAttribute("type", "text", "originInfo/place/placeTerm"); } // edition if (!empty($row['edition'])) { $origin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition"); } } if ($origin->hasBranch()) { $record->addXMLBranch($origin); } } // language if (!empty($row['language'])) { $record->setTagContent(encodeXMLField('language', $row['language']), "mods/language"); } // abstract // NOTE: This field is excluded by the default cite SELECT method if (!empty($row['abstract'])) { $abstract = new XMLBranch("abstract"); $abstract->setTagContent(encodeXMLField('abstract', $row['abstract'])); if (!empty($row['summary_language'])) { $abstract->setTagAttribute("lang", encodeXMLField('summary_language', $row['summary_language'])); } $record->addXMLBranch($abstract); } // subject // keywords if (!empty($row['keywords'])) { $subjectArray = array(); $subjectArray = preg_split("/\\s*;\\s*/", $row['keywords']); // "unrelated" keywords foreach ($subjectArray as $singleSubject) { $subjectBranch = new XMLBranch("subject"); $topicArray = array(); $topicArray = preg_split("/\\s*,\\s*/", $singleSubject); // "related" keywords foreach ($topicArray as $singleTopic) { $topicBranch = new XMLBranch("topic"); $topicBranch->setTagContent(encodeXMLField('keywords', $singleTopic)); $subjectBranch->addXMLBranch($topicBranch); } $record->addXMLBranch($subjectBranch); } } // user_keys // NOTE: a copy of the above. Needs to be a separate function later. if (!empty($row['user_keys']) && $exportPrivate) { $subjectArray = array(); $subjectArray = preg_split("/\\s*;\\s*/", $row['user_keys']); // "unrelated" user_keys foreach ($subjectArray as $singleSubject) { $subjectBranch = new XMLBranch("subject"); $topicArray = array(); $topicArray = preg_split("/\\s*,\\s*/", $singleSubject); // "related" user_keys foreach ($topicArray as $singleTopic) { $topicBranch = new XMLBranch("topic"); $topicBranch->setTagContent(encodeXMLField('user_keys', $singleTopic)); $subjectBranch->addXMLBranch($topicBranch); } $record->addXMLBranch($subjectBranch); } } // user_groups // NOTE: a copy of the above. Needs to be a separate function later. if (!empty($row['user_groups']) && $exportPrivate) { $subjectArray = array(); $subjectArray = preg_split("/\\s*;\\s*/", $row['user_groups']); // "unrelated" user_groups foreach ($subjectArray as $singleSubject) { $subjectBranch = new XMLBranch("subject"); $topicArray = array(); $topicArray = preg_split("/\\s*,\\s*/", $singleSubject); // "related" user_groups foreach ($topicArray as $singleTopic) { $topicBranch = new XMLBranch("topic"); $topicBranch->setTagContent(encodeXMLField('user_groups', $singleTopic)); $subjectBranch->addXMLBranch($topicBranch); } $record->addXMLBranch($subjectBranch); } } // notes if (!empty($row['notes'])) { $record->setTagContent(encodeXMLField('notes', $row['notes']), "mods/note"); } // user_notes if (!empty($row['user_notes']) && $exportPrivate) { // replaces any generic notes $record->setTagContent(encodeXMLField('user_notes', $row['user_notes']), "mods/note"); } // refbase attribution string if ($exportRecordURL) { $attributionBranch = new XMLBranch("note"); $attributionBranch->setTagContent("exported from refbase (" . $databaseBaseURL . "show.php?record=" . $row['serial'] . "), last updated on " . $currentDateTimeStamp); $record->addXMLBranch($attributionBranch); } // typeOfResource // maps are 'cartographic', software is 'software, multimedia', // and everything else is 'text' $type = new XMLBranch("typeOfResource"); if ($row['type'] == "Map") { $type->setTagContent("cartographic"); } else { if ($row['type'] == "Software") { $type->setTagContent("software, multimedia"); } else { $type->setTagContent("text"); } } if ($row['type'] == "Manuscript") { $type->setTagAttribute("manuscript", "yes"); } $record->addXMLBranch($type); // location // Physical Location // NOTE: This field is excluded by the default cite SELECT method // This should also be parsed later if (!empty($row['location'])) { $location = new XMLBranch("location"); $locationArray = array(); $locationArray = preg_split("/\\s*;\\s*/", $row['location']); foreach ($locationArray as $singleLocation) { $locationBranch = new XMLBranch("physicalLocation"); $locationBranch->setTagContent(encodeXMLField('location', $singleLocation)); $location->addXMLBranch($locationBranch); } $record->addXMLBranch($location); } // URL (also an identifier, see below) // NOTE: This field is excluded by the default cite SELECT method if (!empty($row['url'])) { $location = new XMLBranch("location"); $location->setTagContent(encodeXMLField('url', $row['url']), "location/url"); $record->addXMLBranch($location); } // Include a link to any corresponding FILE if one of the following conditions is met: // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone' // - the variable '$fileVisibility' is set to 'login' AND the user is logged in // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download' // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0) if ($fileVisibility == "everyone" or $fileVisibility == "login" and isset($_SESSION['loginEmail']) or $fileVisibility == "user-specific" and (isset($_SESSION['user_permissions']) and preg_match("/allow_download/", $_SESSION['user_permissions'])) or !empty($fileVisibilityException) and preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])) { // file // Note that when converting MODS to Endnote or RIS, Bibutils will include the above // URL (if given), otherwise it'll take the URL from the 'file' field. I.e. for // Endnote or RIS, the URL to the PDF is only included if no regular URL is available. if (!empty($row['file'])) { $location = new XMLBranch("location"); if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) { // if the 'file' field contains a full URL (starting with "http://", "https://", "ftp://", or "file://") $URLprefix = ""; // we don't alter the URL given in the 'file' field } else { // if the 'file' field contains only a partial path (like 'polarbiol/10240001.pdf') or just a file name (like '10240001.pdf') // use the base URL of the standard files directory as prefix: if (preg_match('#^/#', $filesBaseURL)) { // absolute path -> file dir is located outside of refbase root dir $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL; } else { // relative path -> file dir is located within refbase root dir $URLprefix = $databaseBaseURL . $filesBaseURL; } } $location->setTagContent(encodeXMLField('file', $URLprefix . $row['file']), "location/url"); $location->setTagAttribute("displayLabel", "Electronic full text", "location/url"); // the 'access' attribute requires MODS v3.2 or greater: $location->setTagAttribute("access", "raw object", "location/url"); $record->addXMLBranch($location); } } // identifier // url if (!empty($row['url'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('url', $row['url'])); $identifier->setTagAttribute("type", "uri"); $record->addXMLBranch($identifier); } // doi if (!empty($row['doi'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('doi', $row['doi'])); $identifier->setTagAttribute("type", "doi"); $record->addXMLBranch($identifier); } // pubmed // NOTE: Until refbase stores PubMed & arXiv IDs in a better way, // we extract them from the 'notes' field if (preg_match("/PMID *: *\\d+/i", $row['notes'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(preg_replace("/.*?PMID *: *(\\d+).*/i", "\\1", $row['notes'])); $identifier->setTagAttribute("type", "pubmed"); $record->addXMLBranch($identifier); } // arxiv // NOTE: see note for pubmed if (preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(preg_replace("/.*?arXiv *: *([^ ;]+).*/i", "\\1", $row['notes'])); $identifier->setTagAttribute("type", "arxiv"); $record->addXMLBranch($identifier); } // cite_key if (!empty($citeKey)) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('cite_key', $citeKey)); $identifier->setTagAttribute("type", "citekey"); $record->addXMLBranch($identifier); } // local--CALL NUMBER // NOTE: This should really be parsed! if (!empty($row['call_number'])) { $identifierArray = array(); $identifierArray = preg_split("/\\s*;\\s*/", $row['call_number']); foreach ($identifierArray as $singleIdentifier) { if (!preg_match("/@\\s*\$/", $singleIdentifier)) { $identifierBranch = new XMLBranch("identifier"); $identifierBranch->setTagContent(encodeXMLField('call_number', $singleIdentifier)); $identifierBranch->setTagAttribute("type", "local"); $record->addXMLBranch($identifierBranch); } } } // --- END TYPE * --- // ----------------------------------------- // --- BEGIN TYPE != ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE --- // | // | BOOK WHOLE, CONFERENCE VOLUME, JOURNAL, MANUAL, MANUSCRIPT, MAP, MISCELLANEOUS, PATENT, // | REPORT, and SOFTWARE have some info as a branch off the root, whereas ABSTRACT, BOOK CHAPTER, // | CONFERENCE ARTICLE, JOURNAL ARTICLE, MAGAZINE ARTICLE and NEWSPAPER ARTICLE place it in the relatedItem branch. if (!preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)\$/", $row['type'])) { // name // editor if (!empty($row['editor'])) { $editor = $row['editor']; $author = $row['author']; if (preg_match("/ *\\(eds?\\)\$/", $editor)) { $editor = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $editor); } if (preg_match("/ *\\(eds?\\)\$/", $author)) { $author = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $author); } if ($editor != $author) { $nameArray = separateNames("editor", "/\\s*;\\s*/", "/\\s*,\\s*/", "/(?<=^|[{$word}])[^-{$word}]+|(?<=^|[{$upper}])(?=\$|[{$upper}])/{$patternModifiers}", $editor, "personal", "editor"); foreach ($nameArray as $singleName) { $record->addXMLBranch($singleName); } } } // corporate // (we treat a 'corporate_author' similar to how Bibutils converts the BibTeX // 'organization' field to MODS XML, i.e., we add a separate name element with // a 'type="corporate"' attribute and an 'author' role (or a 'degree grantor' // role in case of theses)) if (!empty($row['corporate_author'])) { $nameBranch = new XMLBranch("name"); $nameBranch->setTagAttribute("type", "corporate"); $nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart"); if (empty($row['thesis'])) { $nameBranch->setTagContent("author", "name/role/roleTerm"); } else { // thesis $nameBranch->setTagContent("degree grantor", "name/role/roleTerm"); } $nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm"); $nameBranch->setTagAttribute("type", "text", "name/role/roleTerm"); $record->addXMLBranch($nameBranch); } // conference if (!empty($row['conference'])) { $nameBranch = new XMLBranch("name"); $nameBranch->setTagAttribute("type", "conference"); $nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart"); $record->addXMLBranch($nameBranch); } // genre // type // NOTE: Is there a better MARC genre[1] for 'manuscript?' // [1]<http://www.loc.gov/marc/sourcecode/genre/genrelist.html> $genremarc = new XMLBranch("genre"); $genre = new XMLBranch("genre"); // NOTE: According to the MARC "Source Codes for Genre"[1] // the MARC authority should be 'marcgt', not 'marc'. // [1]<http://www.loc.gov/marc/sourcecode/genre/genresource.html> $genremarc->setTagAttribute("authority", "marcgt"); if (empty($row['thesis'])) { // theses will get their own genre (see below) if ($row['type'] == "Book Whole") { $record->setTagContent("monographic", "mods/originInfo/issuance"); $genremarc->setTagContent("book"); } else { if ($row['type'] == "Conference Volume") { $genremarc->setTagContent("conference publication"); } else { if ($row['type'] == "Journal") { $genremarc->setTagContent("periodical"); $genre->setTagContent("academic journal"); } else { if ($row['type'] == "Manual") { // should we set '<issuance>monographic' here (and for the ones below)? $genremarc->setTagContent("instruction"); $genre->setTagContent("manual"); } else { if ($row['type'] == "Manuscript") { $genremarc->setTagContent("loose-leaf"); $genre->setTagContent("manuscript"); } else { if ($row['type'] == "Map") { $genremarc->setTagContent("map"); } else { if ($row['type'] == "Miscellaneous") { $genre->setTagContent("miscellaneous"); } else { if ($row['type'] == "Patent") { $genremarc->setTagContent("patent"); } else { if ($row['type'] == "Report") { $genremarc->setTagContent("technical report"); $genre->setTagContent("report"); } else { if ($row['type'] == "Software") { // $genremarc->setTagContent("programmed text"); // would this be correct? $genre->setTagContent("software"); } else { if (!empty($row['type'])) { // catch-all: don't use a MARC genre $genre->setTagContent(encodeXMLField('type', $row['type'])); } } } } } } } } } } } if ($genremarc->hasLeaf()) { $record->addXMLBranch($genremarc); } if ($genre->hasLeaf()) { $record->addXMLBranch($genre); } } else { // if (!empty($row['thesis'])) $record->setTagContent("monographic", "mods/originInfo/issuance"); $thesismarc = new XMLBranch("genre"); $thesis = new XMLBranch("genre"); $thesismarc->setTagContent("thesis"); $thesismarc->setTagAttribute("authority", "marcgt"); // tweak thesis names so that Bibutils will recognize them: if ($row['thesis'] == "Master's thesis") { $row['thesis'] = "Masters thesis"; } $thesis->setTagContent(encodeXMLField('thesis', $row['thesis'])); $record->addXMLBranch($thesismarc); $record->addXMLBranch($thesis); } // physicalDescription // pages if (!empty($row['pages'])) { $description = new XMLBranch("physicalDescription"); $pages = new XMLBranch("extent"); $pages->setTagAttribute("unit", "pages"); if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range // split the page range into start and end pages list($pagestart, $pageend) = preg_split('/\\s*[-]\\s*/', $row['pages']); if ($pagestart < $pageend) { // extents MUST span multiple pages $pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start"); $pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end"); } else { $pages->setTagContent(encodeXMLField('pages', $row['pages'])); } } else { if (preg_match("/^\\d\\d*\\s*pp?.?\$/", $row['pages'])) { list($pagetotal) = preg_split('/\\s*pp?/', $row['pages']); $pages->setTagContent(encodeXMLField('pages', $pagetotal), "extent/total"); } else { $pages->setTagContent(encodeXMLField('pages', $row['pages'])); } } $description->addXMLBranch($pages); $record->addXMLBranch($description); } // identifier // isbn if (!empty($row['isbn'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('isbn', $row['isbn'])); $identifier->setTagAttribute("type", "isbn"); $record->addXMLBranch($identifier); } // issn if (!empty($row['issn'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('issn', $row['issn'])); $identifier->setTagAttribute("type", "issn"); $record->addXMLBranch($identifier); } // series if (!empty($row['series_editor']) || !empty($row['series_title']) || !empty($row['abbrev_series_title']) || !empty($row['series_volume']) || !empty($row['series_issue'])) { $record->addXMLBranch(serialBranch($row['series_editor'], $row['series_title'], $row['abbrev_series_title'], $row['series_volume'], $row['series_issue'])); } } else { // if (preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)$/", $row['type'])) // relatedItem $related = new XMLBranch("relatedItem"); $related->setTagAttribute("type", "host"); // title (Publication) if (!empty($row['publication'])) { $related->setTagContent(encodeXMLField('publication', $row['publication']), "relatedItem/titleInfo/title"); } // title (Abbreviated Journal) if (!empty($row['abbrev_journal'])) { $titleabbrev = new XMLBranch("titleInfo"); $titleabbrev->setTagAttribute("type", "abbreviated"); $titleabbrev->setTagContent(encodeXMLField('abbrev_journal', $row['abbrev_journal']), "titleInfo/title"); $related->addXMLBranch($titleabbrev); } // name // editor if (!empty($row['editor'])) { $editor = $row['editor']; if (preg_match("/ *\\(eds?\\)\$/", $editor)) { $editor = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $editor); } $nameArray = separateNames("editor", "/\\s*;\\s*/", "/\\s*,\\s*/", "/(?<=^|[{$word}])[^-{$word}]+|(?<=^|[{$upper}])(?=\$|[{$upper}])/{$patternModifiers}", $editor, "personal", "editor"); foreach ($nameArray as $singleName) { $related->addXMLBranch($singleName); } } // corporate // NOTE: a copy of the code for 'corporate_author' above. // Needs to be a separate function later. if (!empty($row['corporate_author'])) { $nameBranch = new XMLBranch("name"); $nameBranch->setTagAttribute("type", "corporate"); $nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart"); if (empty($row['thesis'])) { $nameBranch->setTagContent("author", "name/role/roleTerm"); } else { // thesis $nameBranch->setTagContent("degree grantor", "name/role/roleTerm"); } $nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm"); $nameBranch->setTagAttribute("type", "text", "name/role/roleTerm"); $related->addXMLBranch($nameBranch); } // conference // NOTE: a copy of the code for 'conference' above. // Needs to be a separate function later. if (!empty($row['conference'])) { $nameBranch = new XMLBranch("name"); $nameBranch->setTagAttribute("type", "conference"); $nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart"); $related->addXMLBranch($nameBranch); } // originInfo $relorigin = new XMLBranch("originInfo"); // dateIssued if (!empty($row['year'])) { $relorigin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued"); } // publisher if (!empty($row['publisher'])) { $relorigin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher"); } // place if (!empty($row['place'])) { $relorigin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm"); $relorigin->setTagAttribute("type", "text", "originInfo/place/placeTerm"); } // edition if (!empty($row['edition'])) { $relorigin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition"); } if ($relorigin->hasBranch()) { $related->addXMLBranch($relorigin); } // genre (and originInfo/issuance) if (empty($row['thesis'])) { // theses will get their own genre (see below) if (preg_match("/^(Journal Article|Magazine Article)\$/", $row['type'])) { $related->setTagContent("continuing", "relatedItem/originInfo/issuance"); $genremarc = new XMLBranch("genre"); $genre = new XMLBranch("genre"); $genremarc->setTagContent("periodical"); $genremarc->setTagAttribute("authority", "marcgt"); if ($row['type'] == "Magazine Article") { $genre->setTagContent("magazine"); } else { $genre->setTagContent("academic journal"); } $related->addXMLBranch($genremarc); $related->addXMLBranch($genre); } else { if ($row['type'] == "Abstract") { $record->setTagContent("abstract or summary", "mods/genre"); $record->setTagAttribute("authority", "marcgt", "mods/genre"); } else { if ($row['type'] == "Conference Article") { $related->setTagContent("conference publication", "relatedItem/genre"); $related->setTagAttribute("authority", "marcgt", "relatedItem/genre"); } else { if ($row['type'] == "Newspaper Article") { $related->setTagContent("continuing", "relatedItem/originInfo/issuance"); $related->setTagContent("newspaper", "relatedItem/genre"); $related->setTagAttribute("authority", "marcgt", "relatedItem/genre"); } else { // if ($row['type'] == "Book Chapter") $related->setTagContent("monographic", "relatedItem/originInfo/issuance"); $related->setTagContent("book", "relatedItem/genre"); $related->setTagAttribute("authority", "marcgt", "relatedItem/genre"); } } } } } else { // if (!empty($row['thesis'])) $thesismarc = new XMLBranch("genre"); $thesis = new XMLBranch("genre"); $thesismarc->setTagContent("thesis"); $thesismarc->setTagAttribute("authority", "marcgt"); // tweak thesis names so that Bibutils will recognize them: if ($row['thesis'] == "Master's thesis") { $row['thesis'] = "Masters thesis"; } $thesis->setTagContent(encodeXMLField('thesis', $row['thesis'])); $related->addXMLBranch($thesismarc); $related->addXMLBranch($thesis); } if (!empty($row['year']) || !empty($row['volume']) || !empty($row['issue']) || !empty($row['pages'])) { $part = new XMLBranch("part"); if (!empty($row['year'])) { $part->setTagContent(encodeXMLField('year', $row['year']), "date"); } if (!empty($row['volume'])) { $detailvolume = new XMLBranch("detail"); $detailvolume->setTagContent(encodeXMLField('volume', $row['volume']), "detail/number"); $detailvolume->setTagAttribute("type", "volume"); $part->addXMLBranch($detailvolume); } if (!empty($row['issue'])) { $detailnumber = new XMLBranch("detail"); $detailnumber->setTagContent(encodeXMLField('issue', $row['issue']), "detail/number"); $detailnumber->setTagAttribute("type", "issue"); $part->addXMLBranch($detailnumber); } if (!empty($row['pages'])) { if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range // split the page range into start and end pages list($pagestart, $pageend) = preg_split('/\\s*[-]\\s*/', $row['pages']); if ($pagestart < $pageend) { // extents MUST span multiple pages $pages = new XMLBranch("extent"); $pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start"); $pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end"); $pages->setTagAttribute("unit", "page"); } else { $pages = new XMLBranch("detail"); if ($pagestart == $pageend) { // single-page item $pages->setTagContent(encodeXMLField('pages', $pagestart), "detail/number"); } else { $pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number"); } $pages->setTagAttribute("type", "page"); } } else { $pages = new XMLBranch("detail"); $pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number"); $pages->setTagAttribute("type", "page"); } $part->addXMLBranch($pages); } $related->addXMLBranch($part); } // identifier // isbn if (!empty($row['isbn'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('isbn', $row['isbn'])); $identifier->setTagAttribute("type", "isbn"); $related->addXMLBranch($identifier); } // issn if (!empty($row['issn'])) { $identifier = new XMLBranch("identifier"); $identifier->setTagContent(encodeXMLField('issn', $row['issn'])); $identifier->setTagAttribute("type", "issn"); $related->addXMLBranch($identifier); } // series if (!empty($row['series_editor']) || !empty($row['series_title']) || !empty($row['abbrev_series_title']) || !empty($row['series_volume']) || !empty($row['series_issue'])) { $related->addXMLBranch(serialBranch($row['series_editor'], $row['series_title'], $row['abbrev_series_title'], $row['series_volume'], $row['series_issue'])); } $record->addXMLBranch($related); } // --- END TYPE == ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE --- return $record; }
function openSearchGenerateBaseTags($openSearchOperation) { if ($openSearchOperation == "Error") { // OpenSearch Atom XML is used for diagnostics $atomCollection = atomGenerateBaseTags($openSearchOperation); } elseif ($openSearchOperation == "Description") { $atomCollection = new XML("OpenSearchDescription"); $atomCollection->setTagAttribute("xmlns", "http://a9.com/-/spec/opensearch/1.1/"); $atomCollection->setTagAttribute("xmlns:opensearch", "http://a9.com/-/spec/opensearch/1.1/"); $atomCollection->setTagAttribute("xmlns:mozilla", "http://www.mozilla.org/2006/browser/search/"); } return $atomCollection; }
function odfSpreadsheetTableRow($recordExportArray, $rowType) { // create an XML object for a single record $record = new XML("table:table-row"); if ($rowType == "heading") { $record->setTagAttribute("table:style-name", "ro1"); foreach ($recordExportArray as $odfIndex => $indexValue) { $tableCell = new XMLBranch("table:table-cell"); $tableCell->setTagAttribute("office:value-type", "string"); $tableCell->setTagContent($odfIndex, "table:table-cell/text:p"); $record->addXMLBranch($tableCell); } } else { $record->setTagAttribute("table:style-name", "ro2"); foreach ($recordExportArray as $odfIndex => $indexValue) { $tableCell = new XMLBranch("table:table-cell"); if (!empty($indexValue)) { $tableCell->setTagAttribute("office:value-type", "string"); $tableCell->setTagContent($indexValue, "table:table-cell/text:p"); } $record->addXMLBranch($tableCell); } } return $record; }
function atomGenerateBaseTags($atomOperation) { global $officialDatabaseName; // these variables are specified in 'ini.inc.php' global $databaseBaseURL; global $feedbackEmail; global $contentTypeCharset; global $convertExportDataToUTF8; global $logoImageURL; global $faviconImageURL; global $query; $atomCollection = new XML("feed"); $atomCollection->setTagAttribute("xmlns", "http://www.w3.org/2005/Atom"); $atomCollection->setTagAttribute("xmlns:opensearch", "http://a9.com/-/spec/opensearch/1.1/"); $atomCollection->setTagAttribute("xmlns:unapi", "http://unapi.info/"); // NOTE: is the unAPI namespace ok? Or should we use "http://unapi.info/specs/", or maybe something like "http://purl.org/unapi/ns/" ? $atomCollection->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); $atomCollection->setTagAttribute("xmlns:dcterms", "http://purl.org/dc/terms/"); $atomCollection->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/"); $officialDatabaseNameConv = encodeHTMLspecialchars($officialDatabaseName); // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php' if ($atomOperation != "Error") { // convert database name to UTF-8: // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already) if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") { $officialDatabaseNameConv = convertToCharacterEncoding("UTF-8", "IGNORE", $officialDatabaseNameConv); } // function 'convertToCharacterEncoding()' is defined in 'include.inc.php' } // ---------------------------------------------------------- // Add feed-level tags: // (not yet used: category, contributor, rights) // - 'title': addNewBranch($atomCollection, "title", array("type" => "text"), $officialDatabaseNameConv); // - 'subtitle': if ($atomOperation == "Error") { addNewBranch($atomCollection, "subtitle", array(), "Search error!"); } else { // ...extract the 'WHERE' clause from the SQL query to include a natural-language version (well, sort of) within the 'subtitle' element: $queryWhereClause = extractWHEREclause($query); // function 'extractWHEREclause()' is defined in 'include.inc.php' // construct a meaningful feed description based on the actual 'WHERE' clause: // TODO: For Atom XML, the query string should not get HTML encoded! $subTitle = "Displays records where " . encodeHTML(explainSQLQuery($queryWhereClause)); // functions 'encodeHTML()' and 'explainSQLQuery()' are defined in 'include.inc.php' addNewBranch($atomCollection, "subtitle", array(), $subTitle); } // - 'updated': // (TODO: the timestamp in the 'updated' element should really only get updated if any of the matching records was updated, right?) addNewBranch($atomCollection, "updated", array(), generateISO8601TimeStamp()); // function 'generateISO8601TimeStamp()' is defined in 'include.inc.php' // - 'author': $authorBranch = new XMLBranch("author"); $authorBranch->setTagContent($officialDatabaseNameConv, "author/name"); $authorBranch->setTagContent($feedbackEmail, "author/email"); $authorBranch->setTagContent($databaseBaseURL, "author/uri"); $atomCollection->addXMLBranch($authorBranch); // - 'generator', 'icon', 'logo': addNewBranch($atomCollection, "generator", array("uri" => "http://www.refbase.net/", "version" => "0.9.5"), "Web Reference Database (http://refbase.sourceforge.net)"); addNewBranch($atomCollection, "icon", array(), $databaseBaseURL . $faviconImageURL); addNewBranch($atomCollection, "logo", array(), $databaseBaseURL . $logoImageURL); // - 'link' (more links will be added in function 'atomCollection()'): // - link to OpenSearch Description file: atomLink($atomCollection, $databaseBaseURL . "opensearch.php?operation=explain", "search", "OpenSearch", $officialDatabaseNameConv); // - link to unAPI server: atomLink($atomCollection, $databaseBaseURL . "unapi.php", "unapi:unapi-server", "unAPI", "unAPI"); return $atomCollection; }
function oaidcRecord($row, $metadataPrefix = "oai_dc", $addNameSpaceInfo = true) { global $databaseBaseURL; // these variables are defined in 'ini.inc.php' global $contentTypeCharset; global $fileVisibility; global $fileVisibilityException; global $filesBaseURL; global $convertExportDataToUTF8; global $defaultCiteStyle; global $citeStyle; global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' // The array '$transtab_refbase_unicode' contains search & replace patterns for conversion from refbase markup to Unicode entities. global $transtab_refbase_unicode; // defined in 'transtab_refbase_unicode.inc.php' // The array '$transtab_refbase_ascii' contains search & replace patterns for conversion from refbase markup to plain text. global $transtab_refbase_ascii; // defined in 'transtab_refbase_ascii.inc.php' // Define inline text markup to generate a plain text citation string: // (to be included within a 'dcterms:bibliographicCitation' element) $markupPatternsArrayPlain = array("bold-prefix" => "", "bold-suffix" => "", "italic-prefix" => "", "italic-suffix" => "", "underline-prefix" => "", "underline-suffix" => "", "endash" => "-", "emdash" => "-", "ampersand" => "&", "double-quote" => '"', "double-quote-left" => '"', "double-quote-right" => '"', "single-quote" => "'", "single-quote-left" => "'", "single-quote-right" => "'", "less-than" => "<", "greater-than" => ">", "newline" => "\n"); // This is a stupid hack that maps the names of the '$row' array keys to those used // by the '$formVars' array (which is required by function 'generateCiteKey()') // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys) $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php' // Generate or extract the cite key for this record: // (to be included within a 'dc:identifier' element) $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php' // Generate OpenURL data: // (to be included within a 'dc:identifier' element) $openURL = openURL($row, "openurl:"); // function 'openURL()' is defined in 'openurl.inc.php' // Encode special chars and perform charset conversions: foreach ($row as $rowFieldName => $rowFieldValue) { // We only convert those special chars to entities which are supported by XML: // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php' $row[$rowFieldName] = encodeHTMLspecialchars($row[$rowFieldName]); // Convert field data to UTF-8: // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already) // (Note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key // generation will produce garbled text!) // function 'convertToCharacterEncoding()' is defined in 'include.inc.php' if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") { $row[$rowFieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $row[$rowFieldName]); } } // Defines field-specific search & replace 'actions' that will be applied to all those refbase fields that are listed in the corresponding 'fields' element: // (If you don't want to perform any search and replace actions, specify an empty array, like: '$fieldSpecificSearchReplaceActionsArray = array();'. // Note that the search patterns MUST include the leading & trailing slashes -- which is done to allow for mode modifiers such as 'imsxU'.) // "/Search Pattern/" => "Replace Pattern" $fieldSpecificSearchReplaceActionsArray = array(); if ($convertExportDataToUTF8 == "yes") { $fieldSpecificSearchReplaceActionsArray[] = array('fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"), 'actions' => $transtab_refbase_unicode); } // Apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray': foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray) { foreach ($row as $rowFieldName => $rowFieldValue) { if (in_array($rowFieldName, $fieldActionsArray['fields'])) { $row[$rowFieldName] = searchReplaceText($fieldActionsArray['actions'], $rowFieldValue, true); } } } // function 'searchReplaceText()' is defined in 'include.inc.php' // Fetch the name of the citation style file that's associated with the style given in '$citeStyle': $citeStyleFile = getStyleFile($citeStyle); // function 'getStyleFile()' is defined in 'include.inc.php' if (empty($citeStyleFile)) { $citeStyle = $defaultCiteStyle; // if the given cite style could not be found, we'll use the default cite style which is defined by the '$defaultCiteStyle' variable in 'ini.inc.php' $citeStyleFile = getStyleFile($citeStyle); } // Include the found citation style file *once*: include_once "cite/" . $citeStyleFile; // Generate a proper citation for this record, ordering attributes according to the chosen output style & record type: // - Plain text version of citation string: $recordCitationPlain = citeRecord($row, $citeStyle, "", $markupPatternsArrayPlain, false); // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'styles' directory of the refbase root directory) // Convert any refbase markup that remains in the citation string (such as _italic_ or **bold**) to plain text: $recordCitationPlain = searchReplaceText($transtab_refbase_ascii, $recordCitationPlain, true); // Convert any remaining refbase markup in the 'title', 'keywords' & 'abstract' fields to plain text: $row['title'] = searchReplaceText($transtab_refbase_ascii, $row['title'], true); $row['keywords'] = searchReplaceText($transtab_refbase_ascii, $row['keywords'], true); $row['abstract'] = searchReplaceText($transtab_refbase_ascii, $row['abstract'], true); // Strip any " (ed)" or " (eds)" suffix from author/editor string: if (preg_match("/ *\\(eds?\\)\$/", $row['author'])) { $row['author'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['author']); } if (preg_match("/ *\\(eds?\\)\$/", $row['editor'])) { $row['editor'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['editor']); } // Include a link to any corresponding file if one of the following conditions is met: // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone' // - the variable '$fileVisibility' is set to 'login' AND the user is logged in // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download' // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0) // // TODO: - the URL-generating code should be made into a dedicated function (since it's shared with 'modsxml.inc.php' and 'atomxml.inc.php') $printURL = false; if ($fileVisibility == "everyone" or $fileVisibility == "login" and isset($_SESSION['loginEmail']) or $fileVisibility == "user-specific" and (isset($_SESSION['user_permissions']) and preg_match("/allow_download/", $_SESSION['user_permissions'])) or !empty($fileVisibilityException) and preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])) { if (!empty($row['file'])) { if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) { $URLprefix = ""; // we don't alter the URL given in the 'file' field } else { // use the base URL of the standard files directory as prefix: if (preg_match('#^/#', $filesBaseURL)) { // absolute path -> file dir is located outside of refbase root dir $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL; } else { // relative path -> file dir is located within refbase root dir $URLprefix = $databaseBaseURL . $filesBaseURL; } } $printURL = true; } } // ---------------------------------------------------------- // Start OAI_DC XML record: if (!empty($metadataPrefix)) { $recordPrefix = $metadataPrefix . ":"; } $record = new XML($recordPrefix . "dc"); // create an XML object for a single record if ($addNameSpaceInfo) { if ($metadataPrefix == "oai_dc") { $record->setTagAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/"); } elseif ($metadataPrefix == "srw_dc") { $record->setTagAttribute("xmlns:srw_dc", "info:srw/schema/1/dc-v1.1"); } $record->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/"); if ($metadataPrefix == "oai_dc") { $record->setTagAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); $record->setTagAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"); } elseif ($metadataPrefix == "srw_dc") { $record->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/"); } } // Add Dublin Core elements: // NOTE: With a few exceptions, we try to adhere to the guidelines given at // "Using simple Dublin Core to describe eprints" by Andy Powell et al. // See: <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/> // - 'dc:title': if (!empty($row['title'])) { addMetaElement($record, "dc", "title", array(), $row['title']); } // function 'addMetaElement()' is defined in 'webservice.inc.php' // - 'dc:creator': if (!empty($row['author']) and $row['author'] != $row['editor']) { addMetaElement($record, "dc", "creator", array(), $row['author']); } // - 'dc:creator': // TODO: add refbase corporate author(s) as 'dc:creator' // - 'dc:contributor': if (!empty($row['editor'])) { addMetaElement($record, "dc", "contributor", array(), $row['editor']); } // - 'dc:description': if (!empty($row['abstract'])) { addMetaElement($record, "dc", "description", array(), $row['abstract']); } // - 'dc:identifier': // - DOI: if (!empty($row['doi'])) { addMetaElement($record, "dc", "identifier", array(), $row['doi'], "doi"); } // - PMID: if (!empty($row['notes']) and preg_match("/PMID *: *\\d+/i", $row['notes'])) { addMetaElement($record, "dc", "identifier", array(), $row['notes'], "pmid"); } // - arXiv: if (!empty($row['notes']) and preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) { addMetaElement($record, "dc", "identifier", array(), $row['notes'], "arxiv"); } // - ISBN: if (!empty($row['isbn'])) { addMetaElement($record, "dc", "identifier", array(), $row['isbn'], "isbn"); } // - OpenURL: addMetaElement($record, "dc", "identifier", array(), $openURL, "openurl"); // - refbase ID: addMetaElement($record, "dc", "identifier", array(), $databaseBaseURL . generateURL("show.php", "html", array("record" => $row['serial']), true), "url"); // - Cite key: addMetaElement($record, "dc", "identifier", array(), $citeKey, "citekey"); // - Bibliographic citation: // NOTE: In 'atomxml.inc.php', the bibliographic citation is put into a // 'dcterms:bibliographicCitation' element so that it can be uniquely // identified and extracted easily. However, in case of simple Dublin // Core output, we just put it into a 'dc:identifier' element and // use a "citation:" prefix. addMetaElement($record, "dc", "identifier", array(), encodeHTMLspecialchars($recordCitationPlain), "citation"); // - 'dc:source': // NOTE: - In <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>, // Andy Powell et al. recommend that this element should NOT be used! // However, we use 'dc:source' elements for publication & series info // (publication/series title plus volume & issue) to provide a dedicated // source string that's easily readable and parsable. // Example: <dc:source>Polar Biology, Vol. 25, No. 10</dc:source> // - While we could also append the page info to the publication // 'dc:source' element, this info is more pertinent to the article // itself and is thus not included. For 'srw_dc:dc' output, page info is // included in PRISM elements (see below). // - All metadata (including the page info) are also provided as a machine // parsable citation in form of an OpenURL ContextObject (see above). // - Publication info: // NOTE: We only include the 'dc:source' element for 'oai_dc:dc' output. In case of 'srw_dc:dc' // output, we use the more fine-grained PRISM elements instead (see below) if ($metadataPrefix == "oai_dc" and (!empty($row['publication']) or !empty($row['abbrev_journal']))) { if (!empty($row['publication'])) { $source = $row['publication']; } elseif (!empty($row['abbrev_journal'])) { $source = $row['abbrev_journal']; } if (!empty($row['volume'])) { $source .= ", Vol. " . $row['volume']; } if (!empty($row['issue'])) { $source .= ", No. " . $row['issue']; } if (!empty($source)) { addMetaElement($record, "dc", "source", array(), $source); } } // - Series info: if (!empty($row['series_title']) or !empty($row['abbrev_series_title'])) { if (!empty($row['series_title'])) { $series = $row['series_title']; } elseif (!empty($row['abbrev_series_title'])) { $series = $row['abbrev_series_title']; } if (!empty($row['series_volume'])) { $series .= ", Vol. " . $row['series_volume']; } if (!empty($row['series_issue'])) { $series .= ", No. " . $row['series_issue']; } if (!empty($series)) { addMetaElement($record, "dc", "source", array(), $series); } // NOTE: To distinguish between regular publication & series info, // should we better use a "series:" prefix here? If so, use: // addMetaElement($record, "dc", "source", array(), $series, "series"); } // - ISSN: // NOTE: for 'srw_dc:dc' output, we put the ISSN into the 'prism:issn' element if ($metadataPrefix == "oai_dc" and !empty($row['issn'])) { addMetaElement($record, "dc", "source", array(), $row['issn'], "issn"); } // - 'dc:date': if (!empty($row['year'])) { addMetaElement($record, "dc", "date", array(), $row['year']); } // - 'dc:type': if (!empty($row['type'])) { addMetaElement($record, "dc", "type", array(), $row['type'], $row['thesis']); } // In case of a thesis, we add another 'dc:type' element with the actual thesis type: if (!empty($row['thesis'])) { addMetaElement($record, "dc", "type", array(), $row['thesis']); } // - 'dc:format': // TODO: ideally, we should parse the content of the refbase 'medium' field and map it // to a media-type term from <http://www.iana.org/assignments/media-types/> if (!empty($row['medium'])) { $mediaType = $row['medium']; } else { $mediaType = "text"; } addMetaElement($record, "dc", "format", array(), $mediaType); // - 'dc:subject': // TODO: add user-specific keywords (from field 'user_keys') if the user is logged in if (!empty($row['keywords'])) { addMetaElement($record, "dc", "subject", array(), $row['keywords']); } // - 'dc:coverage': // TODO: should we add contents from the refbase 'area' field as 'dc:coverage' element(s)? // - 'dc:relation': // - Related URL: if (!empty($row['url'])) { addMetaElement($record, "dc", "relation", array(), $row['url'], "url"); } // - Related FILE: if ($printURL) { addMetaElement($record, "dc", "relation", array(), $URLprefix . $row['file'], "file"); } // - 'dc:publisher': if (!empty($row['publisher'])) { addMetaElement($record, "dc", "publisher", array(), $row['publisher']); } // - 'dc:language': // TODO: convert to ISO notation (i.e. "en" instead of "English", etc) if (!empty($row['language'])) { addMetaElement($record, "dc", "language", array(), $row['language']); } // ---------------------------------------------------------- // Add PRISM elements: // NOTE: When using the 'srw_dc' namespace (i.e. 'info:srw/schema/1/dc-v1.1' as detailed at // <http://www.loc.gov/standards/sru/resources/dc-schema.html>), I don't think it's allowed // to include anything but the fifteen elements from simple Dublin Core. Is this correct? // If so, then: // // TODO: Do we need to put the PRISM elements in <extraRecordData> instead? Or can we put them within // a separate branch outside of (and next to) the '<srw_dc:dc>' element? Or shall we better omit // them entirely? // More info on SRU Extra Data>: <http://www.loc.gov/standards/sru/specs/extra-data.html> // // See also "Mixing DC metadata with other metadata schemas" in "Guidelines for implementing // Dublin Core in XML" <http://dublincore.org/documents/dc-xml-guidelines/> if ($metadataPrefix == "srw_dc") { // - 'prism:issn': if (!empty($row['issn'])) { addMetaElement($record, "prism", "issn", array(), $row['issn']); } // - 'prism:publicationName': if (!empty($row['publication'])) { addMetaElement($record, "prism", "publicationName", array(), $row['publication']); } elseif (!empty($row['abbrev_journal'])) { addMetaElement($record, "prism", "publicationName", array(), $row['abbrev_journal']); } // - 'prism:publicationDate': if (!empty($row['year'])) { addMetaElement($record, "prism", "publicationDate", array(), $row['year']); } // - 'prism:volume': if (!empty($row['volume'])) { addMetaElement($record, "prism", "volume", array(), $row['volume']); } // - 'prism:number': if (!empty($row['issue'])) { addMetaElement($record, "prism", "number", array(), $row['issue']); } // - 'prism:startingPage', 'prism:endingPage': // TODO: Similar code is used in 'include.in.php', 'modsxml.inc.php' and 'openurl.inc.php', // so this should be made into a dedicated function! if (!empty($row['pages']) and preg_match("/\\d+/i", $row['pages'])) { $pages = preg_replace("/^\\D*(\\d+)( *[{$dash}]+ *\\d+)?.*/i{$patternModifiers}", "\\1\\2", $row['pages']); // extract page range (if there's any), otherwise just the first number $startPage = preg_replace("/^\\D*(\\d+).*/i", "\\1", $row['pages']); // extract starting page $endPage = extractDetailsFromField("pages", $pages, "/\\D+/", "[-1]"); // extract ending page (function 'extractDetailsFromField()' is defined in 'include.inc.php') // NOTE: To extract the ending page, we'll use function 'extractDetailsFromField()' // instead of just grabbing a matched regex pattern since it'll also work // when just a number but no range is given (e.g. when startPage = endPage) // - 'prism:startingPage': if (preg_match("/\\d+ *[{$dash}]+ *\\d+/i{$patternModifiers}", $row['pages'])) { // if there's a page range addMetaElement($record, "prism", "startingPage", array(), $startPage); } // - 'prism:endingPage': addMetaElement($record, "prism", "endingPage", array(), $endPage); } } return $record; }