Пример #1
0
    $sourceFormat = identifySourceFormat($sourceText);
} elseif ($formType == "importID") {
    $sourceFormat = identifySourceID($sourceIDs);
}
// function 'identifySourceID()' is defined in 'import.inc.php'
// --------------------------------------------------------------------
// FETCH DATA FROM URL:
// In case of import via ID:
// TODO: Modify the code so that '$sourceIDs' can contain a mixture of any supported IDs.
if ($formType == "importID" and !empty($sourceIDs) and !empty($sourceFormat)) {
    // - PubMed IDs:
    if (preg_match("/^Pubmed (Medline|XML)\$/i", $sourceFormat) and preg_match("/[0-9]/", $sourceIDs)) {
        // Split on any whitespace between PubMed IDs:
        $idArray = preg_split("/\\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
        // Fetch source data from PubMed.gov for all given PubMed IDs:
        list($errors, $sourceText) = fetchDataFromPubMed($idArray, $sourceFormat);
        // function 'fetchDataFromPubMed()' is defined in 'import.inc.php'
    } elseif (preg_match("/^arXiv XML\$/i", $sourceFormat) and preg_match("#(arXiv:|http://arxiv\\.org/abs/)?([\\w.-]+/\\d{7}|\\d{4}\\.\\d{4,})(v\\d+)?#i", $sourceIDs)) {
        // Remove any "arXiv:" or "http://arxiv.org/abs/" prefixes from the ID string:
        $sourceIDs = preg_replace("#(?<=^|\\s)(arXiv:|http://arxiv\\.org/abs/)#", "", $sourceIDs);
        // Split on any whitespace between arXiv IDs:
        $idArray = preg_split("/\\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
        // Fetch source data from arXiv.org for all given arXiv IDs:
        list($errors, $sourceText) = fetchDataFromArXiv($idArray, $sourceFormat);
        // function 'fetchDataFromArXiv()' is defined in 'import.inc.php'
        // NOTE: In case of function 'fetchDataFromArXiv()', variable '$sourceText' contains the SimplePie object with the parsed Atom XML feed
        // TODO: This is inconsistent with the behaviour of the other 'fetchData*()' functions and we should do something about it!
        // NOTE: Since, for arXiv IDs, '$sourceText' contains the SimplePie object (and not just text), handling of any encoding issues is done
        //       within function 'arxivToRefbase()'
    } elseif (preg_match("/^CrossRef XML\$/i", $sourceFormat) and (preg_match("#(?<=^|\\s)(doi:|http://dx\\.doi\\.org/)?10\\.\\d{4}/\\S+?(?=\$|\\s)#i", $sourceIDs) or preg_match("#(?<=^|\\s)(openurl:|http://.+?(?=\\?))?.*?(?<=[?&])ctx_ver=Z39\\.88-2004(?=&|\$).*?(?=\$|\\s)#i", $sourceIDs))) {
        // Remove any prefixes (like "doi:", "openurl:", "http://dx.doi.org/" or "http://...?") from the ID string:
Пример #2
0
function fetchDOIsFromPubMed($doiArray, $sourceFormat = "CrossRef XML")
{
    global $errors;
    $sourceText = "";
    $pmidArray = array();
    $failedIDs = array();
    if (!empty($doiArray)) {
        // Remove any duplicate IDs:
        $doiArray = array_unique($doiArray);
        foreach ($doiArray as $doi) {
            // Build query URL:
            $sourceURL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" . "?db=pubmed" . "&retmax=1" . "&field=doi" . "&term=" . $doi;
            // Perform query:
            $esearchText = fetchDataFromURL($sourceURL);
            $xml = new SimpleXMLElement($esearchText);
            // requires PHP 5 with --enable-libxml
            if ($xml->Count != 1 || isset($xml->ErrorList->PhraseNotFound) && !empty($xml->ErrorList->PhraseNotFound)) {
                $failedIDs[] = $doi;
            } else {
                // Extract PubMed ID:
                $pmidArray[] = $xml->IdList->Id[0];
            }
        }
    }
    if (!empty($failedIDs)) {
        $failedIDs = array_merge($failedIDs, $pmidArray);
    } else {
        // Fetch source data from PubMed.gov for all found PubMed IDs:
        list($errors, $sourceText) = fetchDataFromPubMed($pmidArray);
    }
    return array($errors, $sourceText, $failedIDs);
}