function find_article_from_id($referent, &$item) { global $debug; global $error; if ($debug) { echo '<div style="border: 1px solid #c7cfd5;background: rgb(255,255,153);padding:15px;">'; echo "<p><b>Resolve at least one of the identifiers</b></p>"; echo '<pre>'; print_r($id); echo '</pre>'; echo '</div>'; } // Resolve identifier $found = false; $error = ERROR_IDENTIFIER_TYPE_UNKNOWN; $cache_id = 0; if ($debug) { echo '<h3>Resolve identifier</h3>'; } //----------doi--------------- if (array_key_exists('doi', $referent->id)) { $error = ERROR_OK; $cache_id = find_in_cache_from_guid('doi', $referent->id['doi']); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); if ($debug) { echo "<h3>Article is in cache</h3>"; echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($item); echo "</pre>"; } $found = true; } else { // Off to CrossRef if (doi_metadata($referent->id['doi'], $item)) { // flesh out with other identifiers (do this here as this is a freshly discovered DOI) $pmid = get_pubmed_from_doi($referent->id['doi']); if ($pmid != 0) { $item->pmid = $pmid; // Abstract? $tmp = new stdclass(); if (pubmed_metadata($pmid, $tmp)) { if (isset($tmp->abstract)) { $item->abstract = $tmp->abstract; } } } if ($debug) { echo "<h3>Article</h3>"; echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($item); echo "</pre>"; } $found = true; } else { $error = ERROR_DOI_NOT_IN_CROSSREF; $error_msg = $referent->id['doi']; } } } //----------hdl--------------- if (array_key_exists('hdl', $referent->id)) { $cache_id = find_in_cache_from_guid('hdl', $referent->id['hdl']); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); if ($debug) { echo "<h3>Article is in cache</h3>"; echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($item); echo "</pre>"; } $found = true; } else { // We don't have this locally, and we have no obvious way of getting metadata without a // lookup table if ($error == ERROR_OK) { // we've got a DOI from above, so it's OK } else { $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER; if ($debug) { echo '<p>Don\'t know how to get metadata for a handle</p>'; } } } } //----------sici--------------- if (array_key_exists('sici', $referent->id)) { $error = ERROR_OK; $cache_id = find_in_cache_from_guid('sici', $referent->id['sici']); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; } else { if (jstor_metadata($referent->id['sici'], $item)) { $item->sici = $referent->id['sici']; $found = true; if ($debug) { echo "<h3>Article</h3>"; echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($item); echo "</pre>"; } } else { $error = ERROR_SICI_NOT_IN_JSTOR; $error_msg = $referent->id['sici']; } } } //----------pmid--------------- if (array_key_exists('pmid', $referent->id)) { $error = ERROR_OK; $cache_id = find_in_cache_from_guid('pmid', $referent->id['pmid']); if ($cache_id != 0) { //echo 'cache'; $item = retrieve_from_db($cache_id); $found = true; } else { if (pubmed_metadata($referent->id['pmid'], $item)) { //print_r($item); $found = true; // Do we have a DOI? if (!isset($item->doi)) { if (in_crossref($item->issn, $item->year, $item->volume)) { $tmp_item = new stdClass(); $doi = search_for_doi($item->issn, $item->volume, $item->spage, 'article', $tmp_item); if ($doi != '') { $item->doi = $doi; } } } if ($debug) { echo "<h3>Article</h3>"; echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($item); echo "</pre>"; } } else { $error = ERROR_PMID_NOT_IN_PUBMED; $error_msg = $referent->id['pmid']; } } } //----------url--------------- if (array_key_exists('url', $referent->id)) { $error = ERROR_OK; $cache_id = find_in_cache_from_guid('url', 'http://' . urldecode($referent->id['url'])); //echo $referent->id['url']; //echo $cache_id; if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; } else { // Can we get metadata from the URL? $item = url2meta('http://' . $referent->id['url']); //print_r($item); if ($item->status == 'ok') { $found = true; //print_r($item); // Do we need to flesh out the metadata? if (isset($item->doi)) { //echo '<b>Flesh out</b>'; if (!isset($item->atitle) || !isset($item->title)) { // store any specific metadata $tmp_values = new stdclass(); if (isset($item->publisher_id)) { $temp_values->publisher_id = $item->publisher_id; } if (isset($item->xml_url)) { $temp_values->xml_url = $item->xml_url; } if (isset($item->url)) { $temp_values->url = $item->url; } if (doi_metadata($item->doi, $item)) { if (isset($temp_values->publisher_id)) { $item->publisher_id = $temp_values->publisher_id; } if (isset($temp_values->xml_url)) { $item->xml_url = $temp_values->xml_url; } if (isset($temp_values->url)) { $item->url = $temp_values->url; } } else { // Bad DOI, bail out... $error = ERROR_DOI_NOT_IN_CROSSREF; $error_msg = $referent->id['url']; return false; } //echo "\n" . __LINE__ . "\n"; //print_r($item); } // Check we haven't found object with this DOI before... $cache_id = find_in_cache_from_guid('doi', $item->doi); } else { /* // It might be worth looking for a DOI (Ingenta, for example, may lack it). $tmp_item = new stdClass; $doi = search_for_doi($item->issn, $item->volume, $item->spage, 'article', $tmp_item); if ($doi != '') { $item->doi = $doi; }*/ } // Have we already got this object? $cache_id = find_in_cache($item); if ($cache_id != 0) { // yes, we already have this // Update info update_article_attribute($cache_id, 'url', 'http://' . $referent->id['url']); } } else { $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER; $error_msg = $referent->id['url']; } } } // If this is a new reference store it if ($found and $cache_id == 0) { if (find_in_cache($item) == 0) { // Sanity check $sane = false; if ((isset($item->issn) || isset($item->title)) && (isset($item->volume) || isset($item->doi)) && (isset($item->spage) || isset($item->doi))) { $sane = true; } if ($sane) { store_in_cache($item); } else { $found = false; $error = ERROR_FAILED_TO_RESOLVE_IDENTIFIER; $error_msg = $referent->id['url']; } } } return $found; }
function doi_exists($doi) { $item = new stdClass(); $exists = doi_metadata($doi, $item); return $exists; }
function jstor_metadata($sici, &$item) { global $config; global $debug; $found = false; $url = 'http://links.jstor.org/sici?sici=' . urlencode($sici); //echo $url; $html = get($url); if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; echo $url . "\n"; echo htmlentities($html); echo "</pre>"; } // Check for any error messages if (preg_match("/<h2>We're Sorry<\\/h2>/", $html)) { return $found; } else { $found = true; } if ('' == $config['proxy_name']) { // Outside Glasgow so we get metadata directly } else { // Inside Glasgow, we are licensed, so we need one more step // Extract stable indentifier if (preg_match('/stable\\/info\\/(?<jstorid>\\d+)\\?/', $html, $match)) { $stable = $match['jstorid']; $item->url = 'http://www.jstor.org/stable/' . $match[1]; // ok, harvest $html = get('http://www.jstor.org/stable/info/' . $match[1]); } } //echo "url=" . $item->url; // Add line feeds so regular expresison works $html = str_replace('<meta', "\n<meta", $html); // Pull out the meta tags preg_match_all("|<meta\\s*name=\"(dc.[A-Za-z]*)\"\\s*(scheme=\"(.*)\")?\\s*(content=\"(.*)\")><\\/meta>|", $html, $out, PREG_PATTERN_ORDER); $r = print_r($out, true); parseDcMeta($out, $item); if ($debug) { echo '<h3>metadata</h3>'; print_r($out); } $out = unpack_sici($sici); //print_r($out); if (isset($out['issn'])) { $item->issn = $out['issn']; } if (isset($out['year'])) { $item->year = $out['year']; } // Some JSTOR articles, such as Copeia, have all three elements in the enumeration, // so that the volume and issue are the second and third elements if (isset($out['locn'])) { if (isset($out['volume'])) { $item->volume = $out['issue']; } if (isset($out['issue'])) { $item->issue = $out['locn']; } } else { if (isset($out['volume'])) { $item->volume = $out['volume']; } if (isset($out['issue'])) { $item->issue = $out['issue']; } } if (isset($out['site'])) { $item->spage = $out['site']; } // Handle identifiers // Make stable URL if (isset($item->doi)) { $stable = $item->doi; $stable = str_replace("10.2307/", "", $stable); $stable = 'http://www.jstor.org/stable/' . $stable; $item->url = $stable; } /*if ($debug) { print_r($item); echo __LINE__ . ' in ' . __FILE__ . "\n"; exit(); }*/ // Is the DOI valid? (not all DOIs in the HTML metadata are valid if (isset($item->doi)) { $crossref_item = new stdClass(); $exists = doi_metadata($item->doi, $crossref_item); if ($exists) { // DOI is cool, so add journal name if (isset($crossref_item->title)) { $item->title = $crossref_item->title; } } else { // Dud DOI, so remove it from the metadata unset($item->doi); } } // Might not have journal name if (!isset($item->title)) { $title = journal_title_from_issn($item->issn); if ($title != '') { $item->title = $title; } } return $found; }