示例#1
0
 function find_article_from_id($referent, &$item)
 {
     global $debug;
     global $error;
     if ($debug) {
         echo '<div style="border: 1px solid #c7cfd5;background: rgb(255,255,153);padding:15px;">';
         echo "<p><b>Resolve at least one of the identifiers</b></p>";
         echo '<pre>';
         print_r($id);
         echo '</pre>';
         echo '</div>';
     }
     // Resolve identifier
     $found = false;
     $error = ERROR_IDENTIFIER_TYPE_UNKNOWN;
     $cache_id = 0;
     if ($debug) {
         echo '<h3>Resolve identifier</h3>';
     }
     //----------doi---------------
     if (array_key_exists('doi', $referent->id)) {
         $error = ERROR_OK;
         $cache_id = find_in_cache_from_guid('doi', $referent->id['doi']);
         if ($cache_id != 0) {
             $item = retrieve_from_db($cache_id);
             if ($debug) {
                 echo "<h3>Article is in cache</h3>";
                 echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                 print_r($item);
                 echo "</pre>";
             }
             $found = true;
         } else {
             // Off to CrossRef
             if (doi_metadata($referent->id['doi'], $item)) {
                 // flesh out with other identifiers (do this here as this is a freshly discovered DOI)
                 $pmid = get_pubmed_from_doi($referent->id['doi']);
                 if ($pmid != 0) {
                     $item->pmid = $pmid;
                     // Abstract?
                     $tmp = new stdclass();
                     if (pubmed_metadata($pmid, $tmp)) {
                         if (isset($tmp->abstract)) {
                             $item->abstract = $tmp->abstract;
                         }
                     }
                 }
                 if ($debug) {
                     echo "<h3>Article</h3>";
                     echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                     print_r($item);
                     echo "</pre>";
                 }
                 $found = true;
             } else {
                 $error = ERROR_DOI_NOT_IN_CROSSREF;
                 $error_msg = $referent->id['doi'];
             }
         }
     }
     //----------hdl---------------
     if (array_key_exists('hdl', $referent->id)) {
         $cache_id = find_in_cache_from_guid('hdl', $referent->id['hdl']);
         if ($cache_id != 0) {
             $item = retrieve_from_db($cache_id);
             if ($debug) {
                 echo "<h3>Article is in cache</h3>";
                 echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                 print_r($item);
                 echo "</pre>";
             }
             $found = true;
         } else {
             // We don't have this locally, and we have no obvious way of getting metadata without a
             // lookup table
             if ($error == ERROR_OK) {
                 // we've got a DOI from above, so it's OK
             } else {
                 $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER;
                 if ($debug) {
                     echo '<p>Don\'t know how to get metadata for a handle</p>';
                 }
             }
         }
     }
     //----------sici---------------
     if (array_key_exists('sici', $referent->id)) {
         $error = ERROR_OK;
         $cache_id = find_in_cache_from_guid('sici', $referent->id['sici']);
         if ($cache_id != 0) {
             $item = retrieve_from_db($cache_id);
             $found = true;
         } else {
             if (jstor_metadata($referent->id['sici'], $item)) {
                 $item->sici = $referent->id['sici'];
                 $found = true;
                 if ($debug) {
                     echo "<h3>Article</h3>";
                     echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                     print_r($item);
                     echo "</pre>";
                 }
             } else {
                 $error = ERROR_SICI_NOT_IN_JSTOR;
                 $error_msg = $referent->id['sici'];
             }
         }
     }
     //----------pmid---------------
     if (array_key_exists('pmid', $referent->id)) {
         $error = ERROR_OK;
         $cache_id = find_in_cache_from_guid('pmid', $referent->id['pmid']);
         if ($cache_id != 0) {
             //echo 'cache';
             $item = retrieve_from_db($cache_id);
             $found = true;
         } else {
             if (pubmed_metadata($referent->id['pmid'], $item)) {
                 //print_r($item);
                 $found = true;
                 // Do we have a DOI?
                 if (!isset($item->doi)) {
                     if (in_crossref($item->issn, $item->year, $item->volume)) {
                         $tmp_item = new stdClass();
                         $doi = search_for_doi($item->issn, $item->volume, $item->spage, 'article', $tmp_item);
                         if ($doi != '') {
                             $item->doi = $doi;
                         }
                     }
                 }
                 if ($debug) {
                     echo "<h3>Article</h3>";
                     echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                     print_r($item);
                     echo "</pre>";
                 }
             } else {
                 $error = ERROR_PMID_NOT_IN_PUBMED;
                 $error_msg = $referent->id['pmid'];
             }
         }
     }
     //----------url---------------
     if (array_key_exists('url', $referent->id)) {
         $error = ERROR_OK;
         $cache_id = find_in_cache_from_guid('url', 'http://' . urldecode($referent->id['url']));
         //echo $referent->id['url'];
         //echo $cache_id;
         if ($cache_id != 0) {
             $item = retrieve_from_db($cache_id);
             $found = true;
         } else {
             // Can we get metadata from the URL?
             $item = url2meta('http://' . $referent->id['url']);
             //print_r($item);
             if ($item->status == 'ok') {
                 $found = true;
                 //print_r($item);
                 // Do we need to flesh out the metadata?
                 if (isset($item->doi)) {
                     //echo '<b>Flesh out</b>';
                     if (!isset($item->atitle) || !isset($item->title)) {
                         // store any specific metadata
                         $tmp_values = new stdclass();
                         if (isset($item->publisher_id)) {
                             $temp_values->publisher_id = $item->publisher_id;
                         }
                         if (isset($item->xml_url)) {
                             $temp_values->xml_url = $item->xml_url;
                         }
                         if (isset($item->url)) {
                             $temp_values->url = $item->url;
                         }
                         if (doi_metadata($item->doi, $item)) {
                             if (isset($temp_values->publisher_id)) {
                                 $item->publisher_id = $temp_values->publisher_id;
                             }
                             if (isset($temp_values->xml_url)) {
                                 $item->xml_url = $temp_values->xml_url;
                             }
                             if (isset($temp_values->url)) {
                                 $item->url = $temp_values->url;
                             }
                         } else {
                             // Bad DOI, bail out...
                             $error = ERROR_DOI_NOT_IN_CROSSREF;
                             $error_msg = $referent->id['url'];
                             return false;
                         }
                         //echo "\n" . __LINE__ . "\n";
                         //print_r($item);
                     }
                     // Check we haven't found object with this DOI before...
                     $cache_id = find_in_cache_from_guid('doi', $item->doi);
                 } else {
                     /*					// It might be worth looking for a DOI (Ingenta, for example, may lack it).
                     					$tmp_item = new stdClass;
                     					$doi = search_for_doi($item->issn, $item->volume, $item->spage, 'article', $tmp_item);
                     					if ($doi != '')
                     					{
                     						$item->doi = $doi;
                     					}*/
                 }
                 // Have we already got this object?
                 $cache_id = find_in_cache($item);
                 if ($cache_id != 0) {
                     // yes, we already have this
                     // Update info
                     update_article_attribute($cache_id, 'url', 'http://' . $referent->id['url']);
                 }
             } else {
                 $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER;
                 $error_msg = $referent->id['url'];
             }
         }
     }
     // If this is a new reference store it
     if ($found and $cache_id == 0) {
         if (find_in_cache($item) == 0) {
             // Sanity check
             $sane = false;
             if ((isset($item->issn) || isset($item->title)) && (isset($item->volume) || isset($item->doi)) && (isset($item->spage) || isset($item->doi))) {
                 $sane = true;
             }
             if ($sane) {
                 store_in_cache($item);
             } else {
                 $found = false;
                 $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER;
                 $error_msg = $referent->id['url'];
             }
         }
     }
     return $found;
 }
示例#2
0
function doi_exists($doi)
{
    $item = new stdClass();
    $exists = doi_metadata($doi, $item);
    return $exists;
}
示例#3
0
文件: jstor.php 项目: rdmpage/bioguid
function jstor_metadata($sici, &$item)
{
    global $config;
    global $debug;
    $found = false;
    $url = 'http://links.jstor.org/sici?sici=' . urlencode($sici);
    //echo $url;
    $html = get($url);
    if ($debug) {
        echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
        echo $url . "\n";
        echo htmlentities($html);
        echo "</pre>";
    }
    // Check for any error messages
    if (preg_match("/<h2>We're Sorry<\\/h2>/", $html)) {
        return $found;
    } else {
        $found = true;
    }
    if ('' == $config['proxy_name']) {
        // Outside Glasgow so we get metadata directly
    } else {
        // Inside Glasgow, we are licensed, so we need one more step
        // Extract stable indentifier
        if (preg_match('/stable\\/info\\/(?<jstorid>\\d+)\\?/', $html, $match)) {
            $stable = $match['jstorid'];
            $item->url = 'http://www.jstor.org/stable/' . $match[1];
            // ok, harvest
            $html = get('http://www.jstor.org/stable/info/' . $match[1]);
        }
    }
    //echo "url=" . $item->url;
    // Add line feeds so regular expresison works
    $html = str_replace('<meta', "\n<meta", $html);
    // Pull out the meta tags
    preg_match_all("|<meta\\s*name=\"(dc.[A-Za-z]*)\"\\s*(scheme=\"(.*)\")?\\s*(content=\"(.*)\")><\\/meta>|", $html, $out, PREG_PATTERN_ORDER);
    $r = print_r($out, true);
    parseDcMeta($out, $item);
    if ($debug) {
        echo '<h3>metadata</h3>';
        print_r($out);
    }
    $out = unpack_sici($sici);
    //print_r($out);
    if (isset($out['issn'])) {
        $item->issn = $out['issn'];
    }
    if (isset($out['year'])) {
        $item->year = $out['year'];
    }
    // Some JSTOR articles, such as Copeia, have all three elements in the enumeration,
    // so that the volume and issue are the second and third elements
    if (isset($out['locn'])) {
        if (isset($out['volume'])) {
            $item->volume = $out['issue'];
        }
        if (isset($out['issue'])) {
            $item->issue = $out['locn'];
        }
    } else {
        if (isset($out['volume'])) {
            $item->volume = $out['volume'];
        }
        if (isset($out['issue'])) {
            $item->issue = $out['issue'];
        }
    }
    if (isset($out['site'])) {
        $item->spage = $out['site'];
    }
    // Handle identifiers
    // Make stable URL
    if (isset($item->doi)) {
        $stable = $item->doi;
        $stable = str_replace("10.2307/", "", $stable);
        $stable = 'http://www.jstor.org/stable/' . $stable;
        $item->url = $stable;
    }
    /*if ($debug)
    	{
    		print_r($item);
    		echo __LINE__ . ' in ' . __FILE__ . "\n";
    		exit();
    	}*/
    // Is the DOI valid? (not all DOIs in the HTML metadata are valid
    if (isset($item->doi)) {
        $crossref_item = new stdClass();
        $exists = doi_metadata($item->doi, $crossref_item);
        if ($exists) {
            // DOI is cool, so add journal name
            if (isset($crossref_item->title)) {
                $item->title = $crossref_item->title;
            }
        } else {
            // Dud DOI, so remove it from the metadata
            unset($item->doi);
        }
    }
    // Might not have journal name
    if (!isset($item->title)) {
        $title = journal_title_from_issn($item->issn);
        if ($title != '') {
            $item->title = $title;
        }
    }
    return $found;
}