} */ } return $found; } // test if (0) { $item = new stdClass(); //search_cinii('', '0003-5092', 18, 185, 0, 1939, $item); //search_cinii('','1343-8786', 2, 439, 445, 1999, $item); // 1(2), June 25 1998: 233-239. // Entomological Science //search_cinii('','1343-8786', 1, 233, 239, 1998, $item); // ANNOTATIONES ZOOLOGICAE JAPONENSES //search_cinii('', '0003-5092', 56, 338, 350, 1983, $item); // Entomological REview //search_cinii('', '0286-9810', 58, 1, 6, 2003, $item); //search_cinii('', '0286-9810', 58, 1, 6, 2003, $item); //search_cinii('', '0286-9810', 61, 119, 126, 2006, $item); // new //search_cinii('', '0286-9810', 58, 1, $item); //search_cinii('', '0003-5092', 56, 338, $item); search_cinii('', '1343-8786', 2, 439, $item); print_r($item); if (find_in_cache($item)) { echo "already exists\n"; } else { echo "store\n"; store_in_cache($item); } }
} if ($formatted_date != '') { $obj->date = $formatted_date; } if ($year != '') { $obj->year = $year; } } } // cleanup if (!isset($obj->url) && !isset($obj->doi)) { $obj->url = $obj->zotero; } // ISSN lookup if (!isset($obj->issn) && 'article' == $obj->genre) { $issn = issn_from_journal_title($obj->title); if ('' != $issn) { $obj->issn = $issn; } } print_r($obj); // Store if (find_in_cache($obj) == 0) { if (isset($obj->issn)) { if ($obj->issn != '') { store_in_cache($obj); } } } } }
function parse_nuytsia($url) { $text = get($url); $text = str_replace("\n", "", $text); $text = str_replace("\r", "", $text); $text = str_replace("\t", "", $text); $paras = explode('</p>', $text); foreach ($paras as $pp) { $pp .= '</p>'; if (preg_match('/class="article"/', $pp)) { $item = new stdclass(); $item->authors = array(); if (preg_match('/<\\/a>(?<authors>(.*))\\((?<year>[0-9]{4})\\)./', $pp, $match)) { $item->year = $match['year']; $authors = $match['authors']; $authors = preg_replace('/(.*)<\\/a>/', '', $authors); $authors = str_replace(' AND ', ', ', $authors); $a = explode('.,', trim($authors)); foreach ($a as $value) { if ($value != '') { $value = trim($value); $value .= "."; // Space initials nicely $value = preg_replace("/\\.([A-Z])/", ". \$1", $value); // Make nice $value = mb_convert_case($value, MB_CASE_TITLE, mb_detect_encoding($value)); // Get parts of name $parts = parse_name($value); $author = new stdClass(); if (isset($parts['last'])) { $author->lastname = $parts['last']; } if (isset($parts['suffix'])) { $author->suffix = $parts['suffix']; } if (isset($parts['first'])) { $author->forename = $parts['first']; if (array_key_exists('middle', $parts)) { $author->forename .= ' ' . $parts['middle']; } } array_push($item->authors, $author); } } } if (preg_match('/<a href="(?<pdf>(http:\\/\\/www.dec.wa.gov.au(.*)\\.pdf))/', $pp, $match)) { $item->pdf = $match['pdf']; } if (preg_match('/\\([0-9]{4}\\).(?<atitle>(.*))<i>Nuytsia<\\/i>/', $pp, $match)) { $item->atitle = strip_tags($match['atitle']); } // <i>Nuytsia</i> <u>19</u> (1) : 191–196 // page separator is en dash 2013 if (preg_match('/<i>(Nuytsia)<\\/i> <u>(?<volume>(.*))<\\/u>\\s*\\((?<issue>(.*))\\)\\s*:\\s*(?<spage>[0-9]+)–(?<epage>(.*))\\.<\\/p>/', $pp, $match)) { $item->title = 'Nuytsia'; $item->volume = $match['volume']; $item->issue = $match['issue']; $item->spage = $match['spage']; $item->epage = $match['epage']; $item->issn = '0085-4417'; } print_r($item); // Store reference here... if (find_in_cache($item) == 0) { store_in_cache($item); } } } }
function find_article_from_page($values, &$item) { global $debug; $found = false; // Is it in our cache? $tmp_item = new stdClass(); $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $values['pages']; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; } else { // Off to the Cloud... // For now limit ourselves to CrossRef $year = ''; if (array_key_exists('date', $values)) { $year = $values['date']; } if (in_crossref($values['issn'], $year, $values['volume'])) { //echo 'Should be in CrossRef'; $max_tries = 50; $doi = ''; $page = $values['pages']; $upper_bound = $page; // save the original starting page $count = 0; while (!$found && $count < $max_tries && $page >= 0) { if ($debug) { echo $count, '.'; } $doi = search_for_doi($values['issn'], $values['volume'], $page, $values['genre'], $item); if ($doi == '') { // Decrease page $page--; // We might now be in the range of a previously found range $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $page; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; // Update upper bound update_page_upperbound($cache_id, $upper_bound); } } else { $found = true; $cache_id = find_in_cache($item); if ($cache_id == 0) { $cache_id = store_in_cache($item); } // Update upper bound update_page_upperbound($cache_id, $upper_bound); //echo 'got it!'; } $count++; } } // OK, try JSTOR (gulp) if (!$found) { if ($debug) { echo '<p>Trying JSTOR ' . $values['issn'] . '</p>'; if (in_jstor($values['issn'], $values['date'])) { echo "in JSTOR\n"; } } if (enough_for_jstor_lookup($values) && in_jstor($values['issn'], $values['date'])) { $max_tries = 20; $page = $values['pages']; $upper_bound = $page; // save the original starting page $temp_values = $values; $count = 0; while (!$found && $count < $max_tries && $page >= 0) { if ($debug) { echo $count, '.'; } $temp_values['spage'] = $page; $sici = sici_from_meta($temp_values); if ($debug) { print_r($temp_values); echo urlencode($sici); } $found = jstor_metadata($sici, $item); if (!$found) { // Decrease page $page--; $temp_values['spage'] = $page; // We might now be in the range of a previously found range $tmp_item->issn = $values['issn']; $tmp_item->volume = $values['volume']; $tmp_item->pages = $page; $cache_id = find_in_cache_from_page($tmp_item); if ($cache_id != 0) { $item = retrieve_from_db($cache_id); $found = true; // Update upper bound update_page_upperbound($cache_id, $upper_bound); } } else { $cache_id = find_in_cache($item); if ($cache_id == 0) { $cache_id = store_in_cache($item); } // Update upper bound update_page_upperbound($cache_id, $upper_bound); //echo 'got it!'; } $count++; } } else { if ($debug) { echo '<p>Not enough for JSTOR lookup, or out of range ' . $values['issn'] . ' ' . $values['date'] . '</p>'; } } } } return $found; }
/** * @brief Import bibliograpohic data from a RIS file * */ function import_ris($ris) { global $debug; $rows = split("\n", $ris); if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($rows); echo "</pre>"; } $state = 1; $genre = ''; foreach ($rows as $r) { $parts = split(" - ", $r); $key = ''; if (isset($parts[1])) { $key = $parts[0]; $value = trim($parts[1]); // clean up any leading and trailing spaces } if (isset($key) && $key == 'TY') { $state = 1; $obj = new stdClass(); $obj->authors = array(); if ('JOUR' == $value) { $genre = 'article'; } } if (isset($key) && $key == 'ER') { $state = 0; if ($debug) { echo 'Line: ' . __LINE__ . "\n"; echo "\n=== Import this object ==\n"; } // ISSN lookup if (!isset($obj->issn) && 'article' == $genre) { $issn = issn_from_journal_title($obj->title); if ('' != $issn) { $obj->issn = $issn; } } // to do: we might want to do a DOI lookup here to get more GUIDs... if (!isset($obj->doi)) { if (in_crossref($obj->issn, $obj->year, $obj->volume)) { $item = new stdclass(); $doi = search_for_doi($obj->issn, $obj->volume, $obj->spage, 'article', $item); if ($doi != '') { $obj->doi = $doi; // Fix missing metadata if (!isset($obj->epage) && isset($item->epage)) { $obj->epage = $item->epage; } } } } // http://en.wikipedia.org/wiki/Chinese_name // For some journals (e.g., Chinese) we need to reverse the name parts returned // by parse_name //echo "boo1 |" . $obj->issn . "|\n"; switch ($obj->issn) { case '0529-1526': // Acta Phytotaxonomica Sinica for ($i = 0; $i < count($obj->authors); $i++) { $tmp = $obj->authors[$i]->forename; $obj->authors[$i]->forename = $obj->authors[$i]->lastname; $obj->authors[$i]->lastname = $tmp; } break; default: break; } // Cleaning... if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; print_r($obj); echo "</pre>"; } // Store reference here... if (find_in_cache($obj) == 0) { if (isset($obj->issn)) { if ($obj->issn != '') { store_in_cache($obj); } } /* for Pac Sci if (isset($obj->volume)) { if ($obj->volume != '') { store_in_cache($obj); } } */ } } if ($state == 1) { if (isset($value)) { process_ris_key($key, $value, $obj); } } } }