$sourceFormat = identifySourceFormat($sourceText); } elseif ($formType == "importID") { $sourceFormat = identifySourceID($sourceIDs); } // function 'identifySourceID()' is defined in 'import.inc.php' // -------------------------------------------------------------------- // FETCH DATA FROM URL: // In case of import via ID: // TODO: Modify the code so that '$sourceIDs' can contain a mixture of any supported IDs. if ($formType == "importID" and !empty($sourceIDs) and !empty($sourceFormat)) { // - PubMed IDs: if (preg_match("/^Pubmed (Medline|XML)\$/i", $sourceFormat) and preg_match("/[0-9]/", $sourceIDs)) { // Split on any whitespace between PubMed IDs: $idArray = preg_split("/\\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY); // Fetch source data from PubMed.gov for all given PubMed IDs: list($errors, $sourceText) = fetchDataFromPubMed($idArray, $sourceFormat); // function 'fetchDataFromPubMed()' is defined in 'import.inc.php' } elseif (preg_match("/^arXiv XML\$/i", $sourceFormat) and preg_match("#(arXiv:|http://arxiv\\.org/abs/)?([\\w.-]+/\\d{7}|\\d{4}\\.\\d{4,})(v\\d+)?#i", $sourceIDs)) { // Remove any "arXiv:" or "http://arxiv.org/abs/" prefixes from the ID string: $sourceIDs = preg_replace("#(?<=^|\\s)(arXiv:|http://arxiv\\.org/abs/)#", "", $sourceIDs); // Split on any whitespace between arXiv IDs: $idArray = preg_split("/\\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY); // Fetch source data from arXiv.org for all given arXiv IDs: list($errors, $sourceText) = fetchDataFromArXiv($idArray, $sourceFormat); // function 'fetchDataFromArXiv()' is defined in 'import.inc.php' // NOTE: In case of function 'fetchDataFromArXiv()', variable '$sourceText' contains the SimplePie object with the parsed Atom XML feed // TODO: This is inconsistent with the behaviour of the other 'fetchData*()' functions and we should do something about it! // NOTE: Since, for arXiv IDs, '$sourceText' contains the SimplePie object (and not just text), handling of any encoding issues is done // within function 'arxivToRefbase()' } elseif (preg_match("/^CrossRef XML\$/i", $sourceFormat) and (preg_match("#(?<=^|\\s)(doi:|http://dx\\.doi\\.org/)?10\\.\\d{4}/\\S+?(?=\$|\\s)#i", $sourceIDs) or preg_match("#(?<=^|\\s)(openurl:|http://.+?(?=\\?))?.*?(?<=[?&])ctx_ver=Z39\\.88-2004(?=&|\$).*?(?=\$|\\s)#i", $sourceIDs))) { // Remove any prefixes (like "doi:", "openurl:", "http://dx.doi.org/" or "http://...?") from the ID string:
function fetchDOIsFromPubMed($doiArray, $sourceFormat = "CrossRef XML") { global $errors; $sourceText = ""; $pmidArray = array(); $failedIDs = array(); if (!empty($doiArray)) { // Remove any duplicate IDs: $doiArray = array_unique($doiArray); foreach ($doiArray as $doi) { // Build query URL: $sourceURL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" . "?db=pubmed" . "&retmax=1" . "&field=doi" . "&term=" . $doi; // Perform query: $esearchText = fetchDataFromURL($sourceURL); $xml = new SimpleXMLElement($esearchText); // requires PHP 5 with --enable-libxml if ($xml->Count != 1 || isset($xml->ErrorList->PhraseNotFound) && !empty($xml->ErrorList->PhraseNotFound)) { $failedIDs[] = $doi; } else { // Extract PubMed ID: $pmidArray[] = $xml->IdList->Id[0]; } } } if (!empty($failedIDs)) { $failedIDs = array_merge($failedIDs, $pmidArray); } else { // Fetch source data from PubMed.gov for all found PubMed IDs: list($errors, $sourceText) = fetchDataFromPubMed($pmidArray); } return array($errors, $sourceText, $failedIDs); }