Esempio n. 1
0
function generateExport($result, $rowOffset, $showRows, $exportFormat, $exportType, $exportStylesheet, $displayType, $viewType, $userID)
{
    global $officialDatabaseName;
    // these variables are defined in 'ini.inc.php'
    global $contentTypeCharset;
    global $convertExportDataToUTF8;
    global $defaultExportFormat;
    global $userOptionsArray;
    global $loc;
    // '$loc' is made globally available in 'core.php'
    // get all user options for the current user:
    // (note that '$userOptionsArray' is made globally available)
    $userOptionsArray = getUserOptions($userID);
    // function 'getUserOptions()' is defined in 'include.inc.php'
    // fetch the path/name of the export format file that's associated with the export format given in '$exportFormat':
    $exportFormatFile = getFormatFile($exportFormat, "export");
    // function 'getFormatFile()' is defined in 'include.inc.php()'
    if (empty($exportFormatFile)) {
        $exportFormat = $defaultExportFormat;
        // if the given export format could not be found, we'll use the default export format which is defined by the '$defaultExportFormat' variable in 'ini.inc.php'
        $exportFormatFile = getFormatFile($exportFormat, "export");
    }
    // include the found export format file *once*:
    include_once "export/" . $exportFormatFile;
    // instead of 'include_once' we could also use: 'if ($rowCounter == 0) { include "export/" . $exportFormatFile; }'
    // export found records using the specified export format:
    $exportText = exportRecords($result, $rowOffset, $showRows, $exportStylesheet, $displayType);
    // function 'exportRecords()' is defined in the export format file given in '$exportFormatFile' (which, in turn, must reside in the 'export' directory of the refbase root directory)
    // adjust the mime type and return exported data based on the key given in '$exportType':
    if (preg_match("/text/i", $exportType)) {
        $exportContentType = "text/plain";
    } elseif (preg_match("/^(html|email)\$/i", $exportType)) {
        $exportContentType = "text/html";
    } elseif (preg_match("/xml/i", $exportType)) {
        // NOTE: Firefox >=2.x, Safari >=2.x and IE >=7.x break client-side XSL for RSS and Atom feeds!
        //       See e.g.: <http://decafbad.com/blog/2006/11/02/firefox-20-breaks-client-side-xsl-for-rss-and-atom-feeds>
        // TODO: Re-evaluate: As a consequence, we apply a VERY dirty hack in 'atomxml.inc.php' that prevents the feed sniffing
        //       and subsequent browser applied default XSLT stylesheet that has been implemented by FireFox 2, Safari 2
        //       and Internet Explorer 7. To prevent the feed sniffing we insert a comment before the feed
        //       element that is larger than 512 bytes. See: <http://feedme.mind-it.info/pivot/entry.php?id=9>
        //
        //       For some browsers (such as the Camino browser <http://caminobrowser.org/>) it's possible to set the content type
        //       to 'application/xml' which (while incorrect for Atom/RSS) will cause the browser to trigger their XML+XSLT renderer
        //       if the Atom/RSS feed was requested together with a stylesheet.
        //
        //       If the content type is set to 'application/atom+xml', Firefox 2 and Safari 2 will always apply their own default
        //       XSLT stylesheet and ignore any client-side XSL transformation!
        if (preg_match("/Atom/i", $exportFormat) and empty($exportStylesheet)) {
            $exportContentType = "application/atom+xml";
        } else {
            $exportContentType = "application/xml";
        }
    } elseif (preg_match("/rss/i", $exportType)) {
        $exportContentType = "application/rss+xml";
    } elseif (preg_match("/file/i", $exportType)) {
        $exportContentType = "text/plain";
        // set the default mime type
        // Note that we do some "quick'n dirty" guessing for some export formats here (e.g., we assume/require that an XML export format name
        // contains 'XML' within its name!). This is in NO way fool proof and should be handled in a better way!
        if (preg_match("/XML/i", $exportFormat)) {
            if (preg_match("/Atom/i", $exportFormat)) {
                // if the export format name contains 'Atom'
                $exportContentType = "application/atom+xml";
            } else {
                $exportContentType = "application/xml";
            }
            if (preg_match("/Atom/i", $exportFormat)) {
                // if the export format name contains 'Atom'
                $exportFileName = "atom_export.xml";
            } elseif (preg_match("/SRW_DC/i", $exportFormat)) {
                // if the export format name contains 'SRW_DC'
                $exportFileName = "srw_dc_export.xml";
            } elseif (preg_match("/SRW_MODS/i", $exportFormat)) {
                // if the export format name contains 'SRW_MODS'
                $exportFileName = "srw_mods_export.xml";
            } elseif (preg_match("/SRW/i", $exportFormat)) {
                // if the export format name contains 'SRW' (fallback)
                $exportFileName = "srw_export.xml";
            } elseif (preg_match("/^MODS/i", $exportFormat)) {
                // if the export format name starts with 'MODS' (NOTE: the regex pattern must not match "SRW_MODS XML")
                $exportFileName = "mods_export.xml";
            } elseif (preg_match("/^(OAI_)?DC/i", $exportFormat)) {
                // if the export format starts contains 'OAI_DC' or 'DC' (NOTE: the regex pattern must not match "SRW_DC XML")
                $exportFileName = "oaidc_export.xml";
            } elseif (preg_match("/ODF|OpenDocument/i", $exportFormat)) {
                if (preg_match("/file/i", $exportType)) {
                    $exportContentType = "application/vnd.oasis.opendocument.spreadsheet";
                    $exportFileName = "odf_export.ods";
                } else {
                    $exportFileName = "content.xml";
                }
            } elseif (preg_match("/Word/i", $exportFormat)) {
                // if the export format name contains 'Word'
                $exportFileName = "msword_export.xml";
            } else {
                $exportFileName = "export.xml";
            }
        } elseif (preg_match("/ADS|BibTeX|Endnote|ISI|RIS/i", $exportFormat)) {
            if (preg_match("/ADS/i", $exportFormat)) {
                $exportFileName = "ads_export.txt";
            } elseif (preg_match("/BibTeX/i", $exportFormat)) {
                $exportFileName = "bibtex_export.bib";
            } elseif (preg_match("/Endnote/i", $exportFormat)) {
                $exportFileName = "endnote_export.enw";
            } elseif (preg_match("/ISI/i", $exportFormat)) {
                $exportFileName = "isi_export.txt";
            } elseif (preg_match("/RIS/i", $exportFormat)) {
                $exportFileName = "ris_export.ris";
            }
        } else {
            $exportFileName = "exported_records.txt";
        }
        // set the default download file name
    }
    // if variable '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php', we'll convert latin1 data to UTF-8
    // when exporting to XML; therefore, we'll need to temporarily set the value of the global '$contentTypeCharset'
    // variable to UTF-8 which will ensure proper HTML output
    if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
        $oldContentTypeCharset = $contentTypeCharset;
        // remember the actual database charset
        $oldOfficialDatabaseName = $officialDatabaseName;
        // remember the database name as originally encoded
        // if the database charset is not "UTF-8" then we'll also need to temporarily convert any higher ASCII chars in variables which get included within the HTML output
        $officialDatabaseName = convertToCharacterEncoding("UTF-8", "IGNORE", $officialDatabaseName);
        // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
        $contentTypeCharset = "UTF-8";
        // for XML output we'll temporarily set the value of '$contentTypeCharset' to "UTF-8"
    }
    // set the appropriate mimetype & set the character encoding to the one given in '$contentTypeCharset':
    setHeaderContentType($exportContentType, $contentTypeCharset);
    // function 'setHeaderContentType()' is defined in 'include.inc.php'
    if (preg_match("/file/i", $exportType)) {
        // instruct the browser to download the resulting XML file:
        header('Content-Disposition: attachment; filename="' . $exportFileName . '"');
    } elseif (preg_match("/^(html|email)\$/i", $exportType)) {
        if (preg_match("/email/i", $exportType)) {
            $emailRecipient = $_SESSION['loginEmail'];
            $emailSubject = "Your records from the " . $officialDatabaseName . " (exported to " . $exportFormat . " format)";
            $emailBody = $exportText;
            sendEmail($emailRecipient, $emailSubject, $emailBody);
            // function 'sendEmail()' is defined in 'include.inc.php'
        }
        // call the 'displayHTMLhead()' function (defined in 'header.inc.php'):
        displayHTMLhead(encodeHTML($officialDatabaseName) . " -- Exported Data", "index,follow", "Data exported from the " . encodeHTML($officialDatabaseName), "", false, "", $viewType, array());
        $exportText = "\n\t<pre>\n" . encodeHTML($exportText) . "\n\t</pre>\n</body>\n</html>\n";
        if ($exportType == "email") {
            $exportText = "\n\t<p>" . "\n\t\t<a href=\"javascript:history.back()\" title=\"" . $loc["LinkTitle_GoBackToResults"] . "\">" . $loc["Go Back"] . "</a>" . "\n\t</p>" . "\n\t<p>" . "\n\t\t<b>The data below have been sent to <a href=\"mailto:" . $_SESSION['loginEmail'] . "\">" . $_SESSION['loginEmail'] . "</a>:</b>" . "\n\t</p>" . $exportText;
        }
    }
    if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
        $contentTypeCharset = $oldContentTypeCharset;
        // restore the actual database charset
        $officialDatabaseName = $oldOfficialDatabaseName;
        // restore the database name as originally encoded
    }
    if (preg_match("/ODF|OpenDocument/i", $exportFormat) && preg_match("/file/i", $exportType)) {
        // This is a dirty hack to zip and return an ODF file.
        // It may be desired to retun other non-textual formats in the future & to return these as attachments by email in the future.
        // If this becomes needed, we should refactor the output.
        $zipfile = zipODF($exportText);
        // function 'zipODF()' is defined in 'odfxml.inc.php'
        echo $zipfile->file();
    } else {
        // we'll present the output within the _same_ browser window:
        // (note that we don't use a popup window here, since this may be blocked by particular browsers)
        echo $exportText;
    }
}
Esempio n. 2
0
        $sourceIDs = preg_replace("#(?<=^|\\s)(openurl:|http://.+?(?=\\?))#", "", $sourceIDs);
        // Split on any whitespace between DOIs/OpenURLs:
        $idArray = preg_split("/\\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
        // Try to retrieve information from PubMed.gov before querying CrossRef.org:
        // TODO: Test with $sourceIDs containing a mixture of DOIs and OpenURLs, as well as with $sourceIDs containing DOIs for articles listed in PubMed AND NOT listed in PubMed!
        if (preg_match("#10\\.\\d{4}/\\S+?(?=\$|\\s)#i", $sourceIDs)) {
            list($errors, $sourceText, $idArray) = fetchDOIsFromPubMed($idArray);
            // function 'fetchDOIsFromPubMed()' is defined in 'import.inc.php'
        }
        if (!empty($idArray)) {
            // Fetch record metadata from CrossRef.org for all given DOIs/OpenURLs:
            list($errors, $sourceText) = fetchDataFromCrossRef($idArray, $sourceFormat);
            // function 'fetchDataFromCrossRef()' is defined in 'import.inc.php'
            // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
            if ($contentTypeCharset == "ISO-8859-1" and detectCharacterEncoding($sourceText) == "UTF-8") {
                $sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8");
            }
        } else {
            $sourceFormat = "Pubmed Medline";
        }
    }
}
// --------------------------------------------------------------------
// PARSE SOURCE TEXT:
if (!empty($sourceText) and !empty($sourceFormat)) {
    // fetch the path/name of the import format file that's associated with the import format given in '$sourceFormat':
    $importFormatFile = getFormatFile($sourceFormat, "import");
    // function 'getFormatFile()' is defined in 'include.inc.php()'
    if (!empty($importFormatFile)) {
        // Get all cite keys specified by the current user and build an array of uniquified cite keys ('$citeKeysArray')
        // which is used to ensure uniqueness of generated cite keys among all imported records as well as the user's existing records:
Esempio n. 3
0
function citeRecords($result, $rowsFound, $query, $queryURL, $showQuery, $showLinks, $rowOffset, $showRows, $previousOffset, $nextOffset, $wrapResults, $citeStyle, $citeOrder, $citeType, $orderBy, $headerMsg, $userID, $viewType)
{
    global $contentTypeCharset;
    // defined in 'ini.inc.php'
    global $client;
    // The array '$transtab_refbase_rtf' contains search & replace patterns for conversion from refbase markup to RTF markup & entities
    global $transtab_refbase_rtf;
    // defined in 'transtab_refbase_rtf.inc.php'
    // Initialize array variables:
    $yearsArray = array();
    $typeTitlesArray = array();
    // Define inline text markup to be used by the 'citeRecord()' function:
    $markupPatternsArray = array("bold-prefix" => "{\\b ", "bold-suffix" => "}", "italic-prefix" => "{\\i ", "italic-suffix" => "}", "underline-prefix" => "{\\ul ", "underline-suffix" => "}", "endash" => "\\endash ", "emdash" => "\\emdash ", "ampersand" => "&", "double-quote" => '"', "double-quote-left" => "\\ldblquote ", "double-quote-right" => "\\rdblquote ", "single-quote" => "'", "single-quote-left" => "\\lquote ", "single-quote-right" => "\\rquote ", "less-than" => "<", "greater-than" => ">", "newline" => "\n{\\f1\\fs24 \\par}\n");
    // Defines search & replace 'actions' that will be applied upon RTF output to all those refbase fields that are listed
    // in the corresponding 'fields' element:
    $rtfSearchReplaceActionsArray = array(array('fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"), 'actions' => $transtab_refbase_rtf));
    // For CLI queries, we'll allow paging thru the result set, i.e. we honour the values of the CLI options '-S|--start' ('$rowOffset')
    // and '-R|--rows' ('$showRows') ('$rowOffset' and '$showRows' are re-assigned in function 'seekInMySQLResultsToOffset()' in 'include.inc.php')
    if (preg_match("/^cli/i", $client)) {
        // if the query originated from a command line client such as the "refbase" CLI client ("cli-refbase-1.0")
        $showMaxRows = $showRows;
    } else {
        $showMaxRows = $rowsFound;
    }
    // otherwise show all rows
    // Setup the basic RTF document structure (RTF functions defined in 'MINIMALRTF.php'):
    $rtf = new MINIMALRTF();
    // initialize RTF object
    $rtfData = $rtf->openRtf();
    // create RTF opening tag
    $rtf->createFontBlock(0, "Arial");
    // create & set RTF font blocks
    $rtf->createFontBlock(1, "Times New Roman");
    $rtfData .= $rtf->setFontBlock();
    // Header:
    if (!empty($headerMsg)) {
        // Remove any colon (":") from end of header message:
        $headerMsg = trimTextPattern($headerMsg, ":", false, true);
        // function 'trimTextPattern()' is defined in 'include.inc.php'
        // Decode any HTML entities:
        // (these may occur in the header message e.g. if the user's preferred display language is not English but German or French, etc)
        $headerMsg = decodeHTML($contentTypeCharset, $headerMsg);
        // function 'decodeHTML()' is defined in 'include.inc.php', and '$contentTypeCharset' is defined in 'ini.inc.php'
        // Convert refbase markup in the header message into appropriate RTF markup & entities:
        $headerMsg = searchReplaceText($transtab_refbase_rtf, $headerMsg, true);
        // function 'searchReplaceText()' is defined in 'include.inc.php'
        $rtfData .= "{\\header\\pard\\qc {$headerMsg}\\par}\n";
    }
    $rtfData .= $rtf->justify("full", 0.5, 0, -0.5);
    // by default, we'll justify text and set a hanging indent (left indent: 0.5, right indent: 0, first-line indent: -0.5)
    // LOOP OVER EACH RECORD:
    // Fetch one page of results (or less if on the last page)
    // (i.e., upto the limit specified in $showMaxRows) fetch a row into the $row array and ...
    for ($rowCounter = 0; $rowCounter < $showMaxRows && ($row = @mysql_fetch_array($result)); $rowCounter++) {
        foreach ($row as $rowFieldName => $rowFieldValue) {
            // Apply search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$rtfSearchReplaceActionsArray':
            foreach ($rtfSearchReplaceActionsArray as $fieldActionsArray) {
                if (in_array($rowFieldName, $fieldActionsArray['fields'])) {
                    $row[$rowFieldName] = searchReplaceText($fieldActionsArray['actions'], $row[$rowFieldName], true);
                }
            }
        }
        // function 'searchReplaceText()' is defined in 'include.inc.php'
        // Order attributes according to the chosen output style & record type:
        $record = citeRecord($row, $citeStyle, $citeType, $markupPatternsArray, false);
        // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'cite' directory of the refbase root directory), see function 'generateCitations()'
        // Print out the current record:
        if (!empty($record)) {
            // Print any section heading(s):
            if (preg_match("/year|type/i", $citeOrder)) {
                $headingPrefix = $rtf->justify("left", 0, 0, 0) . $rtf->paragraph(0, 12);
                // create empty paragraph in front of heading using "Arial" (font block 0) and a font size of 12pt
                $headingSuffix = $rtf->justify("full", 0.5, 0, -0.5);
                // justify any following text and set a hanging indent (left indent: 0.5, right indent: 0, first-line indent: -0.5)
                if ($citeOrder == "type") {
                    // for 'citeOrder=type' we'll always print an empty paragraph after the heading
                    $headingSuffix .= $rtf->paragraph(0, 12);
                }
                // create empty paragraph using "Arial" (font block 0) and a font size of 12pt
                list($yearsArray, $typeTitlesArray, $sectionHeading) = generateSectionHeading($yearsArray, $typeTitlesArray, $row, $citeOrder, $headingPrefix, $headingSuffix, "{\\f0\\fs28 {\\b ", "}\\par}\n", "{\\f0\\fs24 {\\b ", "}\\par}\n");
                // function 'generateSectionHeading()' is defined in 'cite.inc.php'
                // Note that we pass raw RTF commands to the above function instead of using the 'textBlock()' function from 'MINIMALRTF.php'. This is due to a current limitation of the 'generateSectionHeading()' function.
                // For 'citeOrder=year', the appropriate call to the 'textBlock()' function would look like this:
                // $rtfData .= $rtf->textBlock(0, 14, $rtf->bold($row['year'])); // create major heading with the current year using "Arial" (font block 0) and a font size of 14pt, printed in bold
                $rtfData .= $sectionHeading;
            }
            // If character encoding is not UTF-8 already, convert record text to UTF-8:
            if ($contentTypeCharset != "UTF-8") {
                $record = convertToCharacterEncoding("UTF-8", "IGNORE", $record);
            }
            // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
            // Encode characters with an ASCII value of >= 128 in RTF 1.16 unicode format:
            $recordUnicodeCharEncoded = $rtf->utf8_2_unicode($record);
            // converts UTF-8 chars to unicode character codes
            // Write RTF paragraph:
            $rtfData .= $rtf->textBlock(1, 12, $recordUnicodeCharEncoded);
            // create text block with encoded record text using "Times New Roman" (font block 1) and a font size of 12pt
        }
    }
    $rtfData .= $rtf->closeRtf();
    // create RTF closing tag
    return $rtfData;
}
Esempio n. 4
0
function encodeField($fieldName, $fieldValue, $localSearchReplaceActionsArray = array(), $encodingExceptionsArray = array(), $encode = true, $targetFormat = "HTML")
{
    global $contentTypeCharset;
    // these variables are defined in 'ini.inc.php'
    global $convertExportDataToUTF8;
    global $searchReplaceActionsArray;
    if ($encode and !in_array($fieldName, $encodingExceptionsArray)) {
        if ($targetFormat == "HTML") {
            // Encode non-ASCII chars as HTML entities:
            $fieldValue = encodeHTML($fieldValue);
        } elseif ($targetFormat == "XML") {
            // Only convert those special chars to entities which are supported by XML:
            $fieldValue = encodeHTMLspecialchars($fieldValue);
            // Convert field data to UTF-8:
            if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
                $fieldValue = convertToCharacterEncoding("UTF-8", "IGNORE", $fieldValue);
            }
        }
    }
    // Apply *locally* defined search & replace 'actions' to all fields that are listed
    // in the 'fields' element of the arrays contained in '$localSearchReplaceActionsArray':
    foreach ($localSearchReplaceActionsArray as $fieldActionsArray) {
        if (in_array($fieldName, $fieldActionsArray['fields'])) {
            $fieldValue = searchReplaceText($fieldActionsArray['actions'], $fieldValue, true);
        }
    }
    if ($targetFormat == "HTML") {
        // Apply *globally* defined search & replace 'actions' to all fields that are listed
        // in the 'fields' element of the arrays contained in '$searchReplaceActionsArray':
        foreach ($searchReplaceActionsArray as $fieldActionsArray) {
            if (in_array($fieldName, $fieldActionsArray['fields'])) {
                $fieldValue = searchReplaceText($fieldActionsArray['actions'], $fieldValue, true);
            }
        }
    }
    return $fieldValue;
}
Esempio n. 5
0
function arxivToRefbase(&$feed, $importRecordsRadio, $importRecordNumbersArray)
{
    global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers;
    // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
    global $contentTypeCharset;
    // defined in 'ini.inc.php'
    global $errors;
    global $showSource;
    // Pattern by which multiple persons are separated within the author, editor or series editor fields of the source data:
    // (Notes: - name standardization occurs after multiple author fields have been merged by '; '
    //         - the split pattern must be specified as perl-style regular expression (including the leading & trailing
    //           slashes) and may include mode modifiers (such as '/.../i' to perform a case insensitive match))
    $personDelimiter = "/ *; */";
    // Pattern by which a person's family name is separated from the given name (or initials):
    // (the split pattern must be specified as perl-style regular expression (including the leading & trailing
    //  slashes) and may include mode modifiers (such as '/.../i' to perform a case insensitive match))
    $familyNameGivenNameDelimiter = "/ (?=([{$upper}]+[-{$alpha}]+)( *;|\$))/{$patternModifiers}";
    // Specifies whether the person's family name comes first within a person's name
    // ('true' means that the family name is followed by the given name (or initials), 'false' means that the person's family name comes *after* the given name (or initials))
    $familyNameFirst = false;
    // Specifies whether a person's full given name(s) shall be shortened to initial(s):
    // (Notes: - if set to 'true', given names will be abbreviated and initials will get normalized (meaning removal of extra whitespace, adding of dots between initials, etc)
    //         - if set to 'false', given names (and any initials) are taken as is
    //         - in your database, you should stick to either fully written given names OR initials; if you mix these, records won't get sorted correctly on citation output)
    $shortenGivenNames = true;
    // Specifies whether fields whose contents are entirely in upper case shall be transformed to title case ('true') or not ('false'):
    $transformCase = true;
    // Postprocessor actions:
    // Defines search & replace 'actions' that will be applied to all those refbase fields that are listed in the corresponding 'fields' element:
    // (If you don't want to perform any search and replace actions, specify an empty array, like: '$postprocessorActionsArray = array();'.
    //  Note that, in this case, the search patterns MUST include the leading & trailing slashes -- which is done to allow for mode modifiers such as 'imsxU'.)
    //                              "/Search Pattern/" => "Replace Pattern"
    $postprocessorActionsArray = array(array('fields' => array("title", "abstract", "notes"), 'actions' => array("/ *[\n\r]+ */" => " ")), array('fields' => array("title"), 'actions' => array("/[,.;:!] *\$/" => "")));
    // -----------------------------------------
    // PROCESS SOURCE DATA:
    // Initialize array variables:
    $parsedRecordsArray = array();
    // initialize array variable which will hold parsed data of all records that shall be imported
    // NOTE: We do NOT validate records yet, i.e. we assume that they are perfect and attempt to import all of them:
    $importRecordNumbersRecognizedFormatArray = array();
    // initialize array variable which will hold all record numbers of those records that shall be imported AND which were of a recognized format
    $importRecordNumbersNotRecognizedFormatArray = array();
    // same for all records that shall be imported BUT which had an UNrecognized format
    // Use these namespaces to retrieve tags:
    $atomNamespace = 'http://www.w3.org/2005/Atom';
    $opensearchNamespace = 'http://a9.com/-/spec/opensearch/1.1/';
    $arxivNamespace = 'http://arxiv.org/schemas/atom';
    // Get feed data:
    $recordArray = $feed->get_items();
    // fetch all feed items into an array
    $recordsCount = count($recordArray);
    // count how many records are available
    // -----------------------------------------
    // LOOP OVER EACH RECORD:
    for ($i = 0; $i < $recordsCount; $i++) {
        $fieldParametersArray = array();
        // setup an empty array (it will hold all fields that were extracted for a given record)
        $record = $recordArray[$i];
        // this will make things a bit more readable
        // Check for any errors:
        if ($record->get_title() == "Error") {
            $importRecordNumbersNotRecognizedFormatArray[] = $i + 1;
            // append this record number to the list of numbers whose record format is NOT recognized
            $arXivError = $record->get_description();
            // e.g. "incorrect id format for 1234.12345"
            // Prepare an appropriate error message:
            $errorMessage = "Record " . ($i + 1) . ": " . $arXivError . "!";
            if (!isset($errors["sourceText"])) {
                $errors["sourceText"] = $errorMessage;
            } else {
                $errors["sourceText"] = $errors["sourceText"] . "<br>" . $errorMessage;
            }
        } elseif (!$record->get_permalink()) {
            $importRecordNumbersNotRecognizedFormatArray[] = $i + 1;
            // append this record number to the list of numbers whose record format is NOT recognized
            // Prepare an appropriate error message:
            $errorMessage = "Record " . ($i + 1) . ": nothing found!";
            if (!isset($errors["sourceText"])) {
                $errors["sourceText"] = $errorMessage;
            } else {
                $errors["sourceText"] = $errors["sourceText"] . "<br>" . $errorMessage;
            }
        } else {
            // NOTE: We do NOT yet validate any found records, i.e. for now, we'll just assume that they are ok:
            $importRecordNumbersRecognizedFormatArray[] = $i + 1;
            // append this record number to the list of numbers whose record format IS recognized ('$i' starts with 0 so we have to add 1 to point to the correct record number)
            // Extract elements of the current Atom XML entry:
            // - type:
            $fieldParametersArray['type'] = 'Journal Article';
            // NOTE: Are all arXiv records journal articles? TODO: find what isn't & fix the type
            // - id:
            $fieldParametersArray['notes'] = str_replace("http://arxiv.org/abs/", "arXiv:", $record->get_permalink());
            // extract the arXiv ID from the abstract URL in the 'id' element & prefix it with "arXiv:"
            // - title:
            $fieldParametersArray['title'] = $record->get_title();
            // - summary:
            if ($abstract = $record->get_description()) {
                $fieldParametersArray['abstract'] = $abstract;
            }
            // - author:
            // NOTE: If we didn't want to extract author affiliation info, we could just use standard SimplePie functions ('get_authors()' and 'get_name()')
            $authorsArray = array();
            $addressArray = array();
            $authors = $record->get_item_tags($atomNamespace, 'author');
            foreach ($authors as $author) {
                $authorName = "";
                $authorLastName = "";
                $authorAddressArray = "";
                if (isset($author['child'][$atomNamespace]['name']) and $authorName = $author['child'][$atomNamespace]['name'][0]['data']) {
                    // -- name:
                    // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
                    // NOTE: For authors, we need to perform charset conversion up here (and not further down below, as is done for all the other fields),
                    //       since otherwise the below '$upper' and '$alpha' character class elements would fail to match!
                    if ($contentTypeCharset == "ISO-8859-1" and detectCharacterEncoding($authorName) == "UTF-8") {
                        // function 'detectCharacterEncoding()' is defined in 'include.inc.php'
                        $authorName = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $authorName, "UTF-8");
                    }
                    // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
                    // Change the formatting of author names to the one used by refbase, i.e. the family name comes first, and a comma separates family name & initials:
                    // (further standardisation of person names is done in function 'standardizeFieldData()'; see also note for '$familyNameGivenNameDelimiter' above)
                    // NOTE: With the above settings for '$familyNameGivenNameDelimiter' and '$familyNameFirst' this isn't necessary anymore
                    //						$authorName = preg_replace("/^(.+?) +([$upper]+[-$alpha]+)$/$patternModifiers", "\\2, \\1", $authorName);
                    $authorsArray[] = $authorName;
                    // -- arxiv:affiliation:
                    if (isset($author['child'][$arxivNamespace]) and $authorAffiliations = $author['child'][$arxivNamespace]['affiliation']) {
                        foreach ($authorAffiliations as $authorAffiliation) {
                            $authorAddressArray[] = $authorAffiliation['data'];
                        }
                        $authorAddresses = implode(", ", $authorAddressArray);
                        // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
                        if ($contentTypeCharset == "ISO-8859-1" and detectCharacterEncoding($authorAddresses) == "UTF-8") {
                            $authorAddresses = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $authorAddresses, "UTF-8");
                        }
                        $authorLastName = preg_replace("/^([{$upper}]+[-{$alpha}]+).+\$/{$patternModifiers}", "\\1", $authorName);
                        // extract authors last name
                        $addressArray[] = $authorLastName . ": " . $authorAddresses;
                    }
                }
            }
            if (!empty($authorsArray)) {
                $fieldParametersArray['author'] = implode("; ", $authorsArray);
            }
            // merge multiple authors
            if (!empty($addressArray)) {
                $fieldParametersArray['address'] = implode("; ", $addressArray);
            }
            // merge multiple author affiliations
            // - links:
            //
            // TODO: Currently, we just copy a link to the PDF to the 'file' field. It might be desirable to fetch the actual PDF and store it on the refbase server.
            //
            // NOTE: - In order to extract any links, we access the raw SimplePie object here; This is done since, in SimplePie v1.1.1, the standard SimplePie functions
            //         'get_link()' and 'get_links()' only support checking for the 'rel' attribute, but don't allow to filter on the 'type' or 'title' attribute. However,
            //         we need to check the 'type' & 'title' attributes in order to assign PDF & DOI links to the 'file' & 'doi' fields, respectively. Alternatively, we
            //         could also get this information from the URL itself, but that may fail if arXiv changes its URL pattern.
            //       - More info on how to grab custom tags or attributes: <http://simplepie.org/wiki/tutorial/grab_custom_tags_or_attributes>
            $links = $record->get_item_tags($atomNamespace, 'link');
            foreach ($links as $link) {
                if (isset($link['attribs']['']['href'])) {
                    // -- file:
                    if (!isset($fieldParametersArray['file']) and isset($link['attribs']['']['title']) and $link['attribs']['']['title'] == "pdf") {
                        // we could also check for 'type="application/pdf"'
                        $fieldParametersArray['file'] = $link['attribs']['']['href'];
                    } elseif (!isset($fieldParametersArray['url']) and isset($link['attribs']['']['type']) and $link['attribs']['']['type'] == "text/html") {
                        // we could also check for 'title' being unset
                        $fieldParametersArray['url'] = $link['attribs']['']['href'];
                    } elseif (!isset($fieldParametersArray['doi']) and isset($link['attribs']['']['title']) and $link['attribs']['']['title'] == "doi") {
                        $fieldParametersArray['doi'] = str_replace("http://dx.doi.org/", "", $link['attribs']['']['href']);
                    }
                }
            }
            // - arxiv:comment:
            if ($comment = $record->get_item_tags($arxivNamespace, 'comment')) {
                $fieldParametersArray['notes'] .= "; " . $comment[0]['data'];
            }
            // TODO: if arXiv records can include multiple comments, we'd need to loop over all of them
            // - arxiv:primary_category:
            // TODO: Should we copy the term given in the 'arxiv:primary_category' element to the 'area' field?
            // - arxiv:category:
            $categoriesArray = array();
            foreach ($record->get_categories() as $category) {
                $categoriesArray[] = $category->get_label();
            }
            if (!empty($categoriesArray)) {
                $fieldParametersArray['keywords'] = implode("; ", $categoriesArray);
            }
            // merge multiple categories
            // - arxiv:journal_ref:
            if ($journalRef = $record->get_item_tags($arxivNamespace, 'journal_ref')) {
                // We extract the full 'journal_ref' string into its own variable since we're going to mess with it:
                $journalRefData = preg_replace("/ *[\n\r]+ */", " ", $journalRef[0]['data']);
                // transform whitespace: replace any run of whitespace that includes newline/return character(s) with a space
                // NOTE: The formatting of the 'journal_ref' string can vary heavily, so
                //       the below parsing efforts may fail. Therefore, we'll also copy the
                //       original 'journal_ref' string to the 'notes' field, and display it
                //       in the header message when importing single records.
                $fieldParametersArray['source'] = $journalRefData;
                $fieldParametersArray['notes'] .= "; Journal Ref: " . $journalRefData;
                // Extract source info from the 'journal_ref' string into the different fields:
                // NOTE: We try to use reasonably liberal (and thus rather ugly!) regex patterns
                //       which should catch most of the commonly used formatting styles. However,
                //       as noted above, due to the varying formatting of the 'journal_ref' string,
                //       this may not be always entirely successful.
                // TODO: Extract ISSN from the 'journal_ref' string (see e.g. 'arXiv:cond-mat/0506611v1')
                // -- journal:
                $journalName = preg_replace("/^(.+?)(?= *(\\(?\\d+|[,;]|(v(ol)?\\.?|volume) *\\d+|\$)).*/i", "\\1", $journalRefData);
                // extract journal name
                $journalRefData = preg_replace("/^(.+?)(?= *(\\(?\\d+|[,;]|(v(ol)?\\.?|volume) *\\d+|\$))[,; ]*/i", "", $journalRefData);
                // remove journal name from 'journal_ref' string
                if (preg_match("/\\./", $journalName)) {
                    $fieldParametersArray['abbrev_journal'] = preg_replace("/(?<=\\.)(?![ )]|\$)/", " ", $journalName);
                } else {
                    $fieldParametersArray['publication'] = $journalName;
                }
                // -- volume:
                // NOTE: The volume is assumed to be the first number that follows the journal name, and
                //       which is followed by another four-digit number (which is asssumed to be the year).
                if (preg_match("/^(?:(?:v(?:ol)?\\.?|volume) *)?(\\w*\\d+\\w*)(?= *.*?\\d{4})/i", $journalRefData)) {
                    $fieldParametersArray['volume'] = preg_replace("/^(?:(?:v(?:ol)?\\.?|volume) *)?(\\w*\\d+\\w*)(?= *.*?\\d{4}).*/i", "\\1", $journalRefData);
                    // extract volume
                    $journalRefData = preg_replace("/^(?:(?:v(?:ol)?\\.?|volume) *)?(\\w*\\d+\\w*)(?= *.*?\\d{4})[,; ]*/i", "", $journalRefData);
                    // remove volume from 'journal_ref' string
                }
                // -- year (take 1):
                // NOTE: For the first take, we assume the year to be the first occurrence of a four-digit number
                //       that's wrapped in parentheses.
                if (preg_match("/\\(\\d{4}\\)/i", $journalRefData)) {
                    $fieldParametersArray['year'] = preg_replace("/^.*?\\((\\d{4})\\).*?\$/i", "\\1", $journalRefData);
                    // extract year
                    $journalRefData = preg_replace("/[,; ]*\\(\\d{4}\\)[,; ]*/i", " ", $journalRefData);
                    // remove year from 'journal_ref' string
                }
                // -- issue:
                // NOTE: The issue is only recognized if it is preceded with a "n/no/number" prefix, or if it is a
                //       number with less than four digits that is enclosed in parentheses (we can check for the latter
                //       case since four-digit years that are wrapped in parens have already been removed). The regex
                //       patterns below also try to account for some non-digit characters in issue numbers.
                // TODO: Support issue extraction from "Journal Vol:No ..." format (see e.g. 'arXiv:cond-mat/0703452v2')
                if (preg_match("/(?:(?:n\\.|no\\.?|number) *)(\\w*[\\d\\/-]+\\w*)|\\((\\w*(?:\\d{1,3}|\\d{1,2}[\\/-]+\\d{1,2})\\w*)\\)/i", $journalRefData)) {
                    $fieldParametersArray['issue'] = preg_replace("/^.*?(?:(?:(?:n\\.|no\\.?|number) *)(\\w*[\\d\\/-]+\\w*)|\\((\\w*(?:\\d{1,3}|\\d{1,2}[\\/-]+\\d{1,2})\\w*)\\)).*?\$/i", "\\1\\2", $journalRefData);
                    // extract issue
                    $journalRefData = preg_replace("/[,; ]*(?:(?:(?:n\\.|no\\.?|number) *)(\\w*[\\d\\/-]+\\w*)|\\((\\w*(?:\\d{1,3}|\\d{1,2}[\\/-]+\\d{1,2})\\w*)\\))[,; ]*/i", "", $journalRefData);
                    // remove issue from 'journal_ref' string
                }
                // -- pages (take 1):
                // NOTE: For the first take, we assume the pages to be either preceded with a "p/pp" prefix, or to
                //       be a page range.
                if (preg_match("/(?:p(?:p)?\\.? *)(\\w*\\d+\\w*)(?: *-+ *(\\w*\\d+\\w*))?|(?:p(?:p)?\\.? *)?(\\w*\\d+\\w*) *-+ *(\\w*\\d+\\w*)/i", $journalRefData)) {
                    $fieldParametersArray['startPage'] = preg_replace("/^.*?(?:(?:p(?:p)?\\.? *)(\\w*\\d+\\w*)(?: *-+ *(\\w*\\d+\\w*))?|(?:p(?:p)?\\.? *)?(\\w*\\d+\\w*) *-+ *(\\w*\\d+\\w*)).*?\$/i", "\\1\\3", $journalRefData);
                    // extract starting page
                    $fieldParametersArray['endPage'] = preg_replace("/^.*?(?:(?:p(?:p)?\\.? *)(\\w*\\d+\\w*)(?: *-+ *(\\w*\\d+\\w*))?|(?:p(?:p)?\\.? *)?(\\w*\\d+\\w*) *-+ *(\\w*\\d+\\w*)).*?\$/i", "\\2\\4", $journalRefData);
                    // extract ending page
                    $journalRefData = preg_replace("/[,; ]*(?:(?:p(?:p)?\\.? *)(\\w*\\d+\\w*)(?: *-+ *(\\w*\\d+\\w*))?|(?:p(?:p)?\\.? *)?(\\w*\\d+\\w*) *-+ *(\\w*\\d+\\w*))[,; ]*/i", "", $journalRefData);
                    // remove page info from 'journal_ref' string
                }
                // -- year (take 2):
                // NOTE: For the second take, we assume the year to be the first occurrence of any four-digit number
                //       in the remaining 'journal_ref' string.
                if (!isset($fieldParametersArray['year']) and preg_match("/\\b\\d{4}\\b/i", $journalRefData)) {
                    $fieldParametersArray['year'] = preg_replace("/^.*?\\b(\\d{4})\\b.*?\$/i", "\\1", $journalRefData);
                    // extract year
                    $journalRefData = preg_replace("/[,; ]*\\b\\d{4}\\b[,; ]*/i", " ", $journalRefData);
                    // remove year from 'journal_ref' string
                }
                // -- pages (take 2):
                // NOTE: For the second take, we assume the page info to be any number that is at the beginning of
                //       the remaining 'journal_ref' string.
                if (!isset($fieldParametersArray['startPage']) and preg_match("/^[,; ]*\\w*\\d+\\w*/i", $journalRefData)) {
                    $fieldParametersArray['startPage'] = preg_replace("/^[,; ]*(\\w*\\d+\\w*).*?\$/i", "\\1", $journalRefData);
                    // extract page info
                }
            }
            // Standardize field data contained in '$fieldParametersArray':
            foreach ($fieldParametersArray as $fieldKey => $fieldData) {
                // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
                // (we exclude the 'author' and 'address' fields here since they have already been dealt with above)
                if (!preg_match("/^(author|address)\$/", $fieldKey) and $contentTypeCharset == "ISO-8859-1" and detectCharacterEncoding($fieldData) == "UTF-8") {
                    $fieldData = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $fieldData, "UTF-8");
                }
                // Decode HTML special chars:
                if ($fieldKey != "url" and preg_match('/&(amp|quot|#0?39|lt|gt);/', $fieldData)) {
                    $fieldParametersArray[$fieldKey] = decodeHTMLspecialchars($fieldData);
                } elseif ($fieldKey == "url" and preg_match('/&amp;/', $fieldData)) {
                    // in case of the 'url' field, we just decode any ampersand characters
                    $fieldParametersArray[$fieldKey] = str_replace('&amp;', '&', $fieldData);
                }
            }
            // Function 'standardizeFieldData()' e.g. performs case transformation, standardizes thesis names, normalizes page ranges, and reformats person names according to preference:
            $fieldParametersArray = standardizeFieldData($fieldParametersArray, "arXiv XML", $personDelimiter, $familyNameGivenNameDelimiter, $familyNameFirst, $shortenGivenNames, $transformCase, $postprocessorActionsArray);
            // Append the array of extracted field data to the main data array which holds all records to import:
            $parsedRecordsArray[] = $fieldParametersArray;
        }
    }
    // -----------------------------------------
    // Build refbase import array:
    $importDataArray = buildImportArray("refbase", "1.0", "http://refbase.net/import/arxiv/", "Matthias Steffens", "*****@*****.**", array('prefix_call_number' => "true"), $parsedRecordsArray);
    // 'records' - array of record(s) (with each record being a sub-array of fields)
    return array($importDataArray, $recordsCount, $importRecordNumbersRecognizedFormatArray, $importRecordNumbersNotRecognizedFormatArray, $errors);
}
Esempio n. 6
0
function parseRecord($row, $odfIndexesToRefbaseFieldsArray, $referenceTypesToRefbaseTypesArray, $universalSearchReplaceActionsArray, $fieldSpecificSearchReplaceActionsArray)
{
    global $officialDatabaseName;
    // these variables are defined in 'ini.inc.php'
    global $databaseBaseURL;
    global $contentTypeCharset;
    global $convertExportDataToUTF8;
    $fieldParametersArray = array();
    // this is a stupid hack that maps the names of the '$row' array keys to those used
    // by the '$formVars' array (which is required by function 'generateCiteKey()')
    // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
    $formVars = buildFormVarsArray($row);
    // function 'buildFormVarsArray()' is defined in 'include.inc.php'
    // generate or extract the cite key for this record
    $citeKey = generateCiteKey($formVars);
    // function 'generateCiteKey()' is defined in 'include.inc.php'
    // PARSE RECORD:
    // parse the '$odfIndexesToRefbaseFieldsArray' which maps ODF indexes to refbase field names and assign fields accordingly:
    foreach ($odfIndexesToRefbaseFieldsArray as $odfIndex => $refbaseField) {
        if (empty($odfIndexesToRefbaseFieldsArray[$odfIndex])) {
            $fieldParametersArray[$odfIndex] = "";
            // for any unsupported ODF index we'll insert an empty string
        } else {
            // copy row field data to array of field parameters (using the corresponding ODF index name as element key):
            if (!is_array($odfIndexesToRefbaseFieldsArray[$odfIndex])) {
                if (!empty($refbaseField) and !empty($row[$refbaseField])) {
                    $fieldParametersArray[$odfIndex] = $row[$refbaseField];
                }
            } else {
                $useDefault = true;
                // ...we'll extract field data from different refbase fields depending on the current record's reference type:
                foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex] as $referenceType => $refbaseField) {
                    if ($row['type'] == $referenceType) {
                        $useDefault = false;
                        if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex][$referenceType])) {
                            foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex][$referenceType] as $refbaseField) {
                                if (!empty($refbaseField) and !empty($row[$refbaseField])) {
                                    $fieldParametersArray[$odfIndex] = $row[$refbaseField];
                                    break;
                                }
                            }
                        } elseif (!empty($refbaseField) and !empty($row[$refbaseField])) {
                            $fieldParametersArray[$odfIndex] = $row[$refbaseField];
                        }
                        break;
                    }
                }
                // 'Other' is used as default for all refbase types that were NOT explicitly specified:
                if ($useDefault and !isset($fieldParametersArray[$odfIndex]) and isset($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'])) {
                    if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'])) {
                        foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other'] as $refbaseField) {
                            if (!empty($refbaseField) and !empty($row[$refbaseField])) {
                                $fieldParametersArray[$odfIndex] = $row[$refbaseField];
                                break;
                            }
                        }
                    } elseif (!empty($odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']) and !empty($row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']])) {
                        $fieldParametersArray[$odfIndex] = $row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Other']];
                    }
                }
                // if this ODF field is still not set, 'Any' is used as default, no matter whether any refbase types were specified explicitly or not:
                if (!isset($fieldParametersArray[$odfIndex]) and isset($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'])) {
                    if (is_array($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'])) {
                        foreach ($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any'] as $refbaseField) {
                            if (!empty($refbaseField) and !empty($row[$refbaseField])) {
                                $fieldParametersArray[$odfIndex] = $row[$refbaseField];
                                break;
                            }
                        }
                    } elseif (!empty($odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']) and !empty($row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']])) {
                        $fieldParametersArray[$odfIndex] = $row[$odfIndexesToRefbaseFieldsArray[$odfIndex]['Any']];
                    }
                }
            }
            // if this ODF field isn't set yet, provide an empty string:
            if (!isset($fieldParametersArray[$odfIndex])) {
                $fieldParametersArray[$odfIndex] = "";
            }
        }
    }
    // POST-PROCESS FIELD DATA:
    // currently, we'll always overwrite the record serial in the 'Identifier' field with the generated cite key:
    // (this means that NO identifier will be exported if you've unchecked the export option "Include cite keys on export")
    $fieldParametersArray['Identifier'] = $citeKey;
    // convert refbase type names into ODF type numbers:
    $fieldParametersArray['BibliographyType'] = $referenceTypesToRefbaseTypesArray[$fieldParametersArray['BibliographyType']];
    // for theses, set the correct ODF type:
    if (!empty($row['thesis'])) {
        if ($row['thesis'] == "Ph.D. thesis" or $row['thesis'] == "Doctoral thesis") {
            $fieldParametersArray['BibliographyType'] = "11";
        } else {
            $fieldParametersArray['BibliographyType'] = "9";
        }
        // Thesis
        if (isset($fieldParametersArray['Annote'])) {
            $fieldParametersArray['Annote'] .= "; " . $row['thesis'];
        } else {
            $fieldParametersArray['Annote'] = $row['thesis'];
        }
    }
    // if a DOI was copied to the URL field, we'll need to add the DOI resolver:
    if (!empty($row['doi']) and preg_match("/^\\d{2}\\.\\d{4}\\//", $fieldParametersArray['URL'])) {
        $fieldParametersArray['URL'] = "http://dx.doi.org/" . $fieldParametersArray['URL'];
    }
    // use the series volume as volume if 'series_volume' contains some info, but 'volume' doesn't:
    if (empty($row['volume']) and !empty($row['series_volume'])) {
        $fieldParametersArray['Volume'] = $row['series_volume'];
    }
    // set the fourth ODF custom field to a refbase database attribution string and the database URL:
    $fieldParametersArray['Custom4'] = "exported from " . $officialDatabaseName . " (" . $databaseBaseURL . ")";
    // set the fifth ODF custom field to the record's permanent database URL:
    $fieldParametersArray['Custom5'] = $databaseBaseURL . "show.php?record=" . $row['serial'];
    // apply universal search & replace actions, encode special chars and charset conversions to every field that shall be exported:
    foreach ($fieldParametersArray as $fieldName => $fieldValue) {
        if (!empty($fieldValue)) {
            // perform universal search & replace actions:
            if (!empty($universalSearchReplaceActionsArray)) {
                $fieldParametersArray[$fieldName] = searchReplaceText($universalSearchReplaceActionsArray, $fieldParametersArray[$fieldName], true);
            }
            // function 'searchReplaceText()' is defined in 'include.inc.php'
            // we only convert those special chars to entities which are supported by XML:
            $fieldParametersArray[$fieldName] = encodeHTMLspecialchars($fieldParametersArray[$fieldName]);
            // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php'
            // convert field data to UTF-8 (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already):
            // (note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key generation will produce garbled text!)
            if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
                $fieldParametersArray[$fieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $fieldParametersArray[$fieldName]);
            }
            // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
        }
    }
    // apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray':
    foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray) {
        foreach ($fieldParametersArray as $fieldName => $fieldValue) {
            if (in_array($fieldName, $fieldActionsArray['fields'])) {
                $fieldParametersArray[$fieldName] = searchReplaceText($fieldActionsArray['actions'], $fieldValue, true);
            }
        }
    }
    // function 'searchReplaceText()' is defined in 'include.inc.php'
    return $fieldParametersArray;
}
Esempio n. 7
0
function atomGenerateBaseTags($atomOperation)
{
    global $officialDatabaseName;
    // these variables are specified in 'ini.inc.php'
    global $databaseBaseURL;
    global $feedbackEmail;
    global $contentTypeCharset;
    global $convertExportDataToUTF8;
    global $logoImageURL;
    global $faviconImageURL;
    global $query;
    $atomCollection = new XML("feed");
    $atomCollection->setTagAttribute("xmlns", "http://www.w3.org/2005/Atom");
    $atomCollection->setTagAttribute("xmlns:opensearch", "http://a9.com/-/spec/opensearch/1.1/");
    $atomCollection->setTagAttribute("xmlns:unapi", "http://unapi.info/");
    // NOTE: is the unAPI namespace ok? Or should we use "http://unapi.info/specs/", or maybe something like "http://purl.org/unapi/ns/" ?
    $atomCollection->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
    $atomCollection->setTagAttribute("xmlns:dcterms", "http://purl.org/dc/terms/");
    $atomCollection->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/");
    $officialDatabaseNameConv = encodeHTMLspecialchars($officialDatabaseName);
    // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php'
    if ($atomOperation != "Error") {
        // convert database name to UTF-8:
        // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already)
        if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
            $officialDatabaseNameConv = convertToCharacterEncoding("UTF-8", "IGNORE", $officialDatabaseNameConv);
        }
        // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
    }
    // ----------------------------------------------------------
    // Add feed-level tags:
    // (not yet used: category, contributor, rights)
    // - 'title':
    addNewBranch($atomCollection, "title", array("type" => "text"), $officialDatabaseNameConv);
    // - 'subtitle':
    if ($atomOperation == "Error") {
        addNewBranch($atomCollection, "subtitle", array(), "Search error!");
    } else {
        // ...extract the 'WHERE' clause from the SQL query to include a natural-language version (well, sort of) within the 'subtitle' element:
        $queryWhereClause = extractWHEREclause($query);
        // function 'extractWHEREclause()' is defined in 'include.inc.php'
        // construct a meaningful feed description based on the actual 'WHERE' clause:
        // TODO: For Atom XML, the query string should not get HTML encoded!
        $subTitle = "Displays records where " . encodeHTML(explainSQLQuery($queryWhereClause));
        // functions 'encodeHTML()' and 'explainSQLQuery()' are defined in 'include.inc.php'
        addNewBranch($atomCollection, "subtitle", array(), $subTitle);
    }
    // - 'updated':
    //    (TODO: the timestamp in the 'updated' element should really only get updated if any of the matching records was updated, right?)
    addNewBranch($atomCollection, "updated", array(), generateISO8601TimeStamp());
    // function 'generateISO8601TimeStamp()' is defined in 'include.inc.php'
    // - 'author':
    $authorBranch = new XMLBranch("author");
    $authorBranch->setTagContent($officialDatabaseNameConv, "author/name");
    $authorBranch->setTagContent($feedbackEmail, "author/email");
    $authorBranch->setTagContent($databaseBaseURL, "author/uri");
    $atomCollection->addXMLBranch($authorBranch);
    // - 'generator', 'icon', 'logo':
    addNewBranch($atomCollection, "generator", array("uri" => "http://www.refbase.net/", "version" => "0.9.5"), "Web Reference Database (http://refbase.sourceforge.net)");
    addNewBranch($atomCollection, "icon", array(), $databaseBaseURL . $faviconImageURL);
    addNewBranch($atomCollection, "logo", array(), $databaseBaseURL . $logoImageURL);
    // - 'link' (more links will be added in function 'atomCollection()'):
    //   - link to OpenSearch Description file:
    atomLink($atomCollection, $databaseBaseURL . "opensearch.php?operation=explain", "search", "OpenSearch", $officialDatabaseNameConv);
    //   - link to unAPI server:
    atomLink($atomCollection, $databaseBaseURL . "unapi.php", "unapi:unapi-server", "unAPI", "unAPI");
    return $atomCollection;
}
Esempio n. 8
0
function oaidcRecord($row, $metadataPrefix = "oai_dc", $addNameSpaceInfo = true)
{
    global $databaseBaseURL;
    // these variables are defined in 'ini.inc.php'
    global $contentTypeCharset;
    global $fileVisibility;
    global $fileVisibilityException;
    global $filesBaseURL;
    global $convertExportDataToUTF8;
    global $defaultCiteStyle;
    global $citeStyle;
    global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers;
    // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
    // The array '$transtab_refbase_unicode' contains search & replace patterns for conversion from refbase markup to Unicode entities.
    global $transtab_refbase_unicode;
    // defined in 'transtab_refbase_unicode.inc.php'
    // The array '$transtab_refbase_ascii' contains search & replace patterns for conversion from refbase markup to plain text.
    global $transtab_refbase_ascii;
    // defined in 'transtab_refbase_ascii.inc.php'
    // Define inline text markup to generate a plain text citation string:
    // (to be included within a 'dcterms:bibliographicCitation' element)
    $markupPatternsArrayPlain = array("bold-prefix" => "", "bold-suffix" => "", "italic-prefix" => "", "italic-suffix" => "", "underline-prefix" => "", "underline-suffix" => "", "endash" => "-", "emdash" => "-", "ampersand" => "&", "double-quote" => '"', "double-quote-left" => '"', "double-quote-right" => '"', "single-quote" => "'", "single-quote-left" => "'", "single-quote-right" => "'", "less-than" => "<", "greater-than" => ">", "newline" => "\n");
    // This is a stupid hack that maps the names of the '$row' array keys to those used
    // by the '$formVars' array (which is required by function 'generateCiteKey()')
    // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
    $formVars = buildFormVarsArray($row);
    // function 'buildFormVarsArray()' is defined in 'include.inc.php'
    // Generate or extract the cite key for this record:
    // (to be included within a 'dc:identifier' element)
    $citeKey = generateCiteKey($formVars);
    // function 'generateCiteKey()' is defined in 'include.inc.php'
    // Generate OpenURL data:
    // (to be included within a 'dc:identifier' element)
    $openURL = openURL($row, "openurl:");
    // function 'openURL()' is defined in 'openurl.inc.php'
    // Encode special chars and perform charset conversions:
    foreach ($row as $rowFieldName => $rowFieldValue) {
        // We only convert those special chars to entities which are supported by XML:
        // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php'
        $row[$rowFieldName] = encodeHTMLspecialchars($row[$rowFieldName]);
        // Convert field data to UTF-8:
        // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already)
        // (Note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key
        //  generation will produce garbled text!)
        // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
        if ($convertExportDataToUTF8 == "yes" and $contentTypeCharset != "UTF-8") {
            $row[$rowFieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $row[$rowFieldName]);
        }
    }
    // Defines field-specific search & replace 'actions' that will be applied to all those refbase fields that are listed in the corresponding 'fields' element:
    // (If you don't want to perform any search and replace actions, specify an empty array, like: '$fieldSpecificSearchReplaceActionsArray = array();'.
    //  Note that the search patterns MUST include the leading & trailing slashes -- which is done to allow for mode modifiers such as 'imsxU'.)
    //                                          "/Search Pattern/"  =>  "Replace Pattern"
    $fieldSpecificSearchReplaceActionsArray = array();
    if ($convertExportDataToUTF8 == "yes") {
        $fieldSpecificSearchReplaceActionsArray[] = array('fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"), 'actions' => $transtab_refbase_unicode);
    }
    // Apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray':
    foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray) {
        foreach ($row as $rowFieldName => $rowFieldValue) {
            if (in_array($rowFieldName, $fieldActionsArray['fields'])) {
                $row[$rowFieldName] = searchReplaceText($fieldActionsArray['actions'], $rowFieldValue, true);
            }
        }
    }
    // function 'searchReplaceText()' is defined in 'include.inc.php'
    // Fetch the name of the citation style file that's associated with the style given in '$citeStyle':
    $citeStyleFile = getStyleFile($citeStyle);
    // function 'getStyleFile()' is defined in 'include.inc.php'
    if (empty($citeStyleFile)) {
        $citeStyle = $defaultCiteStyle;
        // if the given cite style could not be found, we'll use the default cite style which is defined by the '$defaultCiteStyle' variable in 'ini.inc.php'
        $citeStyleFile = getStyleFile($citeStyle);
    }
    // Include the found citation style file *once*:
    include_once "cite/" . $citeStyleFile;
    // Generate a proper citation for this record, ordering attributes according to the chosen output style & record type:
    // - Plain text version of citation string:
    $recordCitationPlain = citeRecord($row, $citeStyle, "", $markupPatternsArrayPlain, false);
    // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'styles' directory of the refbase root directory)
    //   Convert any refbase markup that remains in the citation string (such as _italic_ or **bold**) to plain text:
    $recordCitationPlain = searchReplaceText($transtab_refbase_ascii, $recordCitationPlain, true);
    // Convert any remaining refbase markup in the 'title', 'keywords' & 'abstract' fields to plain text:
    $row['title'] = searchReplaceText($transtab_refbase_ascii, $row['title'], true);
    $row['keywords'] = searchReplaceText($transtab_refbase_ascii, $row['keywords'], true);
    $row['abstract'] = searchReplaceText($transtab_refbase_ascii, $row['abstract'], true);
    // Strip any " (ed)" or " (eds)" suffix from author/editor string:
    if (preg_match("/ *\\(eds?\\)\$/", $row['author'])) {
        $row['author'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['author']);
    }
    if (preg_match("/ *\\(eds?\\)\$/", $row['editor'])) {
        $row['editor'] = preg_replace("/[ \r\n]*\\(eds?\\)/i", "", $row['editor']);
    }
    // Include a link to any corresponding file if one of the following conditions is met:
    // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone'
    // - the variable '$fileVisibility' is set to 'login' AND the user is logged in
    // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download'
    // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0)
    //
    // TODO: - the URL-generating code should be made into a dedicated function (since it's shared with 'modsxml.inc.php' and 'atomxml.inc.php')
    $printURL = false;
    if ($fileVisibility == "everyone" or $fileVisibility == "login" and isset($_SESSION['loginEmail']) or $fileVisibility == "user-specific" and (isset($_SESSION['user_permissions']) and preg_match("/allow_download/", $_SESSION['user_permissions'])) or !empty($fileVisibilityException) and preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])) {
        if (!empty($row['file'])) {
            if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) {
                $URLprefix = "";
                // we don't alter the URL given in the 'file' field
            } else {
                // use the base URL of the standard files directory as prefix:
                if (preg_match('#^/#', $filesBaseURL)) {
                    // absolute path -> file dir is located outside of refbase root dir
                    $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL;
                } else {
                    // relative path -> file dir is located within refbase root dir
                    $URLprefix = $databaseBaseURL . $filesBaseURL;
                }
            }
            $printURL = true;
        }
    }
    // ----------------------------------------------------------
    // Start OAI_DC XML record:
    if (!empty($metadataPrefix)) {
        $recordPrefix = $metadataPrefix . ":";
    }
    $record = new XML($recordPrefix . "dc");
    // create an XML object for a single record
    if ($addNameSpaceInfo) {
        if ($metadataPrefix == "oai_dc") {
            $record->setTagAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
        } elseif ($metadataPrefix == "srw_dc") {
            $record->setTagAttribute("xmlns:srw_dc", "info:srw/schema/1/dc-v1.1");
        }
        $record->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
        if ($metadataPrefix == "oai_dc") {
            $record->setTagAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
            $record->setTagAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
        } elseif ($metadataPrefix == "srw_dc") {
            $record->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/");
        }
    }
    // Add Dublin Core elements:
    // NOTE: With a few exceptions, we try to adhere to the guidelines given at
    //       "Using simple Dublin Core to describe eprints" by Andy Powell et al.
    //       See: <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>
    // - 'dc:title':
    if (!empty($row['title'])) {
        addMetaElement($record, "dc", "title", array(), $row['title']);
    }
    // function 'addMetaElement()' is defined in 'webservice.inc.php'
    // - 'dc:creator':
    if (!empty($row['author']) and $row['author'] != $row['editor']) {
        addMetaElement($record, "dc", "creator", array(), $row['author']);
    }
    // - 'dc:creator':
    //   TODO: add refbase corporate author(s) as 'dc:creator'
    // - 'dc:contributor':
    if (!empty($row['editor'])) {
        addMetaElement($record, "dc", "contributor", array(), $row['editor']);
    }
    // - 'dc:description':
    if (!empty($row['abstract'])) {
        addMetaElement($record, "dc", "description", array(), $row['abstract']);
    }
    // - 'dc:identifier':
    //   - DOI:
    if (!empty($row['doi'])) {
        addMetaElement($record, "dc", "identifier", array(), $row['doi'], "doi");
    }
    //   - PMID:
    if (!empty($row['notes']) and preg_match("/PMID *: *\\d+/i", $row['notes'])) {
        addMetaElement($record, "dc", "identifier", array(), $row['notes'], "pmid");
    }
    //   - arXiv:
    if (!empty($row['notes']) and preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) {
        addMetaElement($record, "dc", "identifier", array(), $row['notes'], "arxiv");
    }
    //   - ISBN:
    if (!empty($row['isbn'])) {
        addMetaElement($record, "dc", "identifier", array(), $row['isbn'], "isbn");
    }
    //   - OpenURL:
    addMetaElement($record, "dc", "identifier", array(), $openURL, "openurl");
    //   - refbase ID:
    addMetaElement($record, "dc", "identifier", array(), $databaseBaseURL . generateURL("show.php", "html", array("record" => $row['serial']), true), "url");
    //   - Cite key:
    addMetaElement($record, "dc", "identifier", array(), $citeKey, "citekey");
    //   - Bibliographic citation:
    //     NOTE: In 'atomxml.inc.php', the bibliographic citation is put into a
    //           'dcterms:bibliographicCitation' element so that it can be uniquely
    //           identified and extracted easily. However, in case of simple Dublin
    //           Core output, we just put it into a 'dc:identifier' element and
    //           use a "citation:" prefix.
    addMetaElement($record, "dc", "identifier", array(), encodeHTMLspecialchars($recordCitationPlain), "citation");
    // - 'dc:source':
    //   NOTE: - In <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>,
    //           Andy Powell et al. recommend that this element should NOT be used!
    //           However, we use 'dc:source' elements for publication & series info
    //           (publication/series title plus volume & issue) to provide a dedicated
    //           source string that's easily readable and parsable.
    //           Example: <dc:source>Polar Biology, Vol. 25, No. 10</dc:source>
    //         - While we could also append the page info to the publication
    //           'dc:source' element, this info is more pertinent to the article
    //           itself and is thus not included. For 'srw_dc:dc' output, page info is
    //           included in PRISM elements (see below).
    //         - All metadata (including the page info) are also provided as a machine
    //           parsable citation in form of an OpenURL ContextObject (see above).
    //   - Publication info:
    //     NOTE: We only include the 'dc:source' element for 'oai_dc:dc' output. In case of 'srw_dc:dc'
    //           output, we use the more fine-grained PRISM elements instead (see below)
    if ($metadataPrefix == "oai_dc" and (!empty($row['publication']) or !empty($row['abbrev_journal']))) {
        if (!empty($row['publication'])) {
            $source = $row['publication'];
        } elseif (!empty($row['abbrev_journal'])) {
            $source = $row['abbrev_journal'];
        }
        if (!empty($row['volume'])) {
            $source .= ", Vol. " . $row['volume'];
        }
        if (!empty($row['issue'])) {
            $source .= ", No. " . $row['issue'];
        }
        if (!empty($source)) {
            addMetaElement($record, "dc", "source", array(), $source);
        }
    }
    //   - Series info:
    if (!empty($row['series_title']) or !empty($row['abbrev_series_title'])) {
        if (!empty($row['series_title'])) {
            $series = $row['series_title'];
        } elseif (!empty($row['abbrev_series_title'])) {
            $series = $row['abbrev_series_title'];
        }
        if (!empty($row['series_volume'])) {
            $series .= ", Vol. " . $row['series_volume'];
        }
        if (!empty($row['series_issue'])) {
            $series .= ", No. " . $row['series_issue'];
        }
        if (!empty($series)) {
            addMetaElement($record, "dc", "source", array(), $series);
        }
        // NOTE: To distinguish between regular publication & series info,
        //       should we better use a "series:" prefix here? If so, use:
        //				addMetaElement($record, "dc", "source", array(), $series, "series");
    }
    //   - ISSN:
    //     NOTE: for 'srw_dc:dc' output, we put the ISSN into the 'prism:issn' element
    if ($metadataPrefix == "oai_dc" and !empty($row['issn'])) {
        addMetaElement($record, "dc", "source", array(), $row['issn'], "issn");
    }
    // - 'dc:date':
    if (!empty($row['year'])) {
        addMetaElement($record, "dc", "date", array(), $row['year']);
    }
    // - 'dc:type':
    if (!empty($row['type'])) {
        addMetaElement($record, "dc", "type", array(), $row['type'], $row['thesis']);
    }
    //   In case of a thesis, we add another 'dc:type' element with the actual thesis type:
    if (!empty($row['thesis'])) {
        addMetaElement($record, "dc", "type", array(), $row['thesis']);
    }
    // - 'dc:format':
    //   TODO: ideally, we should parse the content of the refbase 'medium' field and map it
    //         to a media-type term from <http://www.iana.org/assignments/media-types/>
    if (!empty($row['medium'])) {
        $mediaType = $row['medium'];
    } else {
        $mediaType = "text";
    }
    addMetaElement($record, "dc", "format", array(), $mediaType);
    // - 'dc:subject':
    //   TODO: add user-specific keywords (from field 'user_keys') if the user is logged in
    if (!empty($row['keywords'])) {
        addMetaElement($record, "dc", "subject", array(), $row['keywords']);
    }
    // - 'dc:coverage':
    //   TODO: should we add contents from the refbase 'area' field as 'dc:coverage' element(s)?
    // - 'dc:relation':
    //   - Related URL:
    if (!empty($row['url'])) {
        addMetaElement($record, "dc", "relation", array(), $row['url'], "url");
    }
    //   - Related FILE:
    if ($printURL) {
        addMetaElement($record, "dc", "relation", array(), $URLprefix . $row['file'], "file");
    }
    // - 'dc:publisher':
    if (!empty($row['publisher'])) {
        addMetaElement($record, "dc", "publisher", array(), $row['publisher']);
    }
    // - 'dc:language':
    //   TODO: convert to ISO notation (i.e. "en" instead of "English", etc)
    if (!empty($row['language'])) {
        addMetaElement($record, "dc", "language", array(), $row['language']);
    }
    // ----------------------------------------------------------
    // Add PRISM elements:
    // NOTE: When using the 'srw_dc' namespace (i.e. 'info:srw/schema/1/dc-v1.1' as detailed at
    //       <http://www.loc.gov/standards/sru/resources/dc-schema.html>), I don't think it's allowed
    //       to include anything but the fifteen elements from simple Dublin Core. Is this correct?
    //       If so, then:
    //
    // TODO: Do we need to put the PRISM elements in <extraRecordData> instead? Or can we put them within
    //       a separate branch outside of (and next to) the '<srw_dc:dc>' element? Or shall we better omit
    //       them entirely?
    //       More info on SRU Extra Data>: <http://www.loc.gov/standards/sru/specs/extra-data.html>
    //
    //       See also "Mixing DC metadata with other metadata schemas" in "Guidelines for implementing
    //       Dublin Core in XML" <http://dublincore.org/documents/dc-xml-guidelines/>
    if ($metadataPrefix == "srw_dc") {
        // - 'prism:issn':
        if (!empty($row['issn'])) {
            addMetaElement($record, "prism", "issn", array(), $row['issn']);
        }
        // - 'prism:publicationName':
        if (!empty($row['publication'])) {
            addMetaElement($record, "prism", "publicationName", array(), $row['publication']);
        } elseif (!empty($row['abbrev_journal'])) {
            addMetaElement($record, "prism", "publicationName", array(), $row['abbrev_journal']);
        }
        // - 'prism:publicationDate':
        if (!empty($row['year'])) {
            addMetaElement($record, "prism", "publicationDate", array(), $row['year']);
        }
        // - 'prism:volume':
        if (!empty($row['volume'])) {
            addMetaElement($record, "prism", "volume", array(), $row['volume']);
        }
        // - 'prism:number':
        if (!empty($row['issue'])) {
            addMetaElement($record, "prism", "number", array(), $row['issue']);
        }
        // - 'prism:startingPage', 'prism:endingPage':
        //   TODO: Similar code is used in 'include.in.php', 'modsxml.inc.php' and 'openurl.inc.php',
        //         so this should be made into a dedicated function!
        if (!empty($row['pages']) and preg_match("/\\d+/i", $row['pages'])) {
            $pages = preg_replace("/^\\D*(\\d+)( *[{$dash}]+ *\\d+)?.*/i{$patternModifiers}", "\\1\\2", $row['pages']);
            // extract page range (if there's any), otherwise just the first number
            $startPage = preg_replace("/^\\D*(\\d+).*/i", "\\1", $row['pages']);
            // extract starting page
            $endPage = extractDetailsFromField("pages", $pages, "/\\D+/", "[-1]");
            // extract ending page (function 'extractDetailsFromField()' is defined in 'include.inc.php')
            // NOTE: To extract the ending page, we'll use function 'extractDetailsFromField()'
            //       instead of just grabbing a matched regex pattern since it'll also work
            //       when just a number but no range is given (e.g. when startPage = endPage)
            // - 'prism:startingPage':
            if (preg_match("/\\d+ *[{$dash}]+ *\\d+/i{$patternModifiers}", $row['pages'])) {
                // if there's a page range
                addMetaElement($record, "prism", "startingPage", array(), $startPage);
            }
            // - 'prism:endingPage':
            addMetaElement($record, "prism", "endingPage", array(), $endPage);
        }
    }
    return $record;
}