function import($reference) { global $year_found; global $year_actual; global $year_roman; global $html; print_r($reference); if (!array_key_exists($reference->year, $year_actual)) { $html[$reference->year] = '<html><body>'; $html[$reference->year] .= '<h1>' . $reference->year . '</h1>'; $year_actual[$reference->year] = 0; $year_found[$reference->year] = 0; $year_roman[$reference->year] = 0; } // Articles published this year $year_actual[$reference->year]++; if (is_numeric($reference->spage)) { $openurl = reference2openurl($reference); //echo $openurl . "\n"; $biostor_id = import_from_openurl($openurl); if ($biostor_id != 0) { $found = true; // Articles found this year $year_found[$reference->year]++; $reference->url = 'http://biostor.org/reference/' . $biostor_id; $url = $reference->url . '.json'; $json = get($url); $j = json_decode($json); //print_r($j); //echo "---\n"; $PageID = $j->bhl_pages[0]; //echo $PageID . "\n"; $html[$reference->year] .= '<div>'; $html[$reference->year] .= '<a href="' . $reference->url . '" target="_new"><img src="http://biostor.org/bhl_image.php?PageID=' . $PageID . '&thumbnail" /></a><br/>'; $html[$reference->year] .= '<a href="http://www.biodiversitylibrary.org/page/' . $PageID . '" target="_new">' . $PageID . '</a><br/>'; $html[$reference->year] .= $reference->title . '<br/>'; $html[$reference->year] .= '</div>'; } } else { $year_roman[$reference->year]++; } }
function bioguid($reference) { $found = false; //echo reference2openurl($reference) . "\n"; $url = 'http://bioguid.info/openurl.php?' . reference2openurl($reference) . '&display=json'; $json = get($url); //echo $url . "\n"; $obj = json_decode($json); //print_r($obj); if ($obj->status == 'ok') { $found = true; if (isset($obj->issn)) { $reference->issn = $obj->issn; } if (isset($obj->doi)) { $reference->doi = $obj->doi; } if (isset($obj->pmid)) { $reference->pmid = $obj->pmid; } if (isset($obj->hdl)) { $reference->hdl = $obj->hdl; } if (isset($obj->url)) { $reference->url = $obj->url; } if (isset($obj->pdf)) { $reference->pdf = $obj->pdf; } if (isset($obj->abstract)) { $reference->abstract = $obj->abstract; } // Flesh out if (isset($obj->atitle) && !isset($reference->title)) { $reference->title = $obj->atitle; } if (isset($obj->issue) && !isset($reference->issue)) { $reference->issue = $obj->issue; } if (isset($obj->spage) && !isset($reference->spage)) { $reference->spage = $obj->spage; } if (isset($obj->epage) && !isset($reference->epage)) { $reference->epage = $obj->epage; } if (isset($obj->url) && !isset($reference->url)) { $reference->url = $obj->url; } if (isset($obj->pdf) && !isset($reference->pdf)) { $reference->pdf = $obj->pdf; } } return $found; }
require_once dirname(__FILE__) . '/utils.php'; $filename = 'notparsed.txt'; $file_handle = fopen($filename, "r"); $failed = array(); while (!feof($file_handle)) { $line = fgets($file_handle); $parts = explode("\t", $line); $parts[1] = trim($parts[1]); if (preg_match('/(?<authorstring>.*)\\s*(?<year>[0-9]{4})\\.\\s+(?<title>.*)\\s+<em>(?<journal>.*)<\\/em>\\s+(?<series>.*)?\\s*<strong>(?<volume>\\d+)<\\/strong>(\\((?<issue>\\d+)\\))?:\\s+(?<spage>\\d+)([–|-](?<epage>\\d+))\\b/Uu', $parts[1], $m)) { //print_r($m); $matched = true; $reference = reference_from_matches($m); //print_r($reference); $reference->id = $parts[0]; $reference->keywords[] = $parts[0]; $openurl = reference2openurl($reference); //echo $openurl . "\n"; bioguid($reference); if (isset($reference->epage)) { $biostor_id = import_from_openurl($openurl); if ($biostor_id != 0) { $found = true; $reference->url = 'http://biostor.org/reference/' . $biostor_id; } } //print_r($reference); echo reference2ris($reference); } else { /* //echo $parts[1]. "\n";