function find_article_from_page($values, &$item) { global $debug; $found = false; // Is it in our cache? $tmp_item = new stdClass(); $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $values['pages']; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; } else { // Off to the Cloud... // For now limit ourselves to CrossRef $year = ''; if (array_key_exists('date', $values)) { $year = $values['date']; } if (in_crossref($values['issn'], $year, $values['volume'])) { //echo 'Should be in CrossRef'; $max_tries = 50; $doi = ''; $page = $values['pages']; $upper_bound = $page; // save the original starting page $count = 0; while (!$found && $count < $max_tries && $page >= 0) { if ($debug) { echo $count, '.'; } $doi = search_for_doi($values['issn'], $values['volume'], $page, $values['genre'], $item); if ($doi == '') { // Decrease page $page--; // We might now be in the range of a previously found range $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $page; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; // Update upper bound update_page_upperbound($cache_id, $upper_bound); } } else { $found = true; $cache_id = find_in_cache($item); if ($cache_id == 0) { $cache_id = store_in_cache($item); } // Update upper bound update_page_upperbound($cache_id, $upper_bound); //echo 'got it!'; } $count++; } } // OK, try JSTOR (gulp) if (!$found) { if ($debug) { echo '<p>Trying JSTOR ' . $values['issn'] . '</p>'; if (in_jstor($values['issn'], $values['date'])) { echo "in JSTOR\n"; } } if (enough_for_jstor_lookup($values) && in_jstor($values['issn'], $values['date'])) { $max_tries = 20; $page = $values['pages']; $upper_bound = $page; // save the original starting page $temp_values = $values; $count = 0; while (!$found && $count < $max_tries && $page >= 0) { if ($debug) { echo $count, '.'; } $temp_values['spage'] = $page; $sici = sici_from_meta($temp_values); if ($debug) { print_r($temp_values); echo urlencode($sici); } $found = jstor_metadata($sici, $item); if (!$found) { // Decrease page $page--; $temp_values['spage'] = $page; // We might now be in the range of a previously found range $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $page; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; // Update upper bound update_page_upperbound($cache_id, $upper_bound); } } else { $cache_id = find_in_cache($item); if ($cache_id == 0) { $cache_id = store_in_cache($item); } // Update upper bound update_page_upperbound($cache_id, $upper_bound); //echo 'got it!'; } $count++; } } else { if ($debug) { echo '<p>Not enough for JSTOR lookup, or out of range ' . $values['issn'] . ' ' . $values['date'] . '</p>'; } } } } return $found; }
/** * @brief Import bibliograpohic data from a RIS file * */ function import_ris($ris) { global $debug; $rows = split("\n", $ris); if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($rows); echo "</pre>"; } $state = 1; $genre = ''; foreach ($rows as $r) { $parts = split(" - ", $r); $key = ''; if (isset($parts[1])) { $key = $parts[0]; $value = trim($parts[1]); // clean up any leading and trailing spaces } if (isset($key) && $key == 'TY') { $state = 1; $obj = new stdClass(); $obj->authors = array(); if ('JOUR' == $value) { $genre = 'article'; } } if (isset($key) && $key == 'ER') { $state = 0; if ($debug) { echo 'Line: ' . __LINE__ . "\n"; echo "\n=== Import this object ==\n"; } // ISSN lookup if (!isset($obj->issn) && 'article' == $genre) { $issn = issn_from_journal_title($obj->title); if ('' != $issn) { $obj->issn = $issn; } } // to do: we might want to do a DOI lookup here to get more GUIDs... if (!isset($obj->doi)) { if (in_crossref($obj->issn, $obj->year, $obj->volume)) { $item = new stdclass(); $doi = search_for_doi($obj->issn, $obj->volume, $obj->spage, 'article', $item); if ($doi != '') { $obj->doi = $doi; // Fix missing metadata if (!isset($obj->epage) && isset($item->epage)) { $obj->epage = $item->epage; } } } } // http://en.wikipedia.org/wiki/Chinese_name // For some journals (e.g., Chinese) we need to reverse the name parts returned // by parse_name //echo "boo1 |" . $obj->issn . "|\n"; switch ($obj->issn) { case '0529-1526': // Acta Phytotaxonomica Sinica for ($i = 0; $i < count($obj->authors); $i++) { $tmp = $obj->authors[$i]->forename; $obj->authors[$i]->forename = $obj->authors[$i]->lastname; $obj->authors[$i]->lastname = $tmp; } break; default: break; } // Cleaning... if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($obj); echo "</pre>"; } // Store reference here... if (find_in_cache($obj) == 0) { if (isset($obj->issn)) { if ($obj->issn != '') { store_in_cache($obj); } } /* for Pac Sci if (isset($obj->volume)) { if ($obj->volume != '') { store_in_cache($obj); } } */ } } if ($state == 1) { if (isset($value)) { process_ris_key($key, $value, $obj); } } } }