function matching_pages($publication, $year) { global $debug; $pages = array(); $matches = array(); $matched = false; // Parse citation if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\.?$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(\\((?<series>.*)\\)\\s+)?(?<volume>\\d+),(\\s+\\((?<issue>\\d+)\\))?\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+),/Uu', $publication, $matches)) { $matched = true; } } // Proc. U.S. nat. Mus., 99, no. 3247, 475. if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+), no. (?<issue>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Spixiana 7 (2): 125. if (!$matched) { if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\):\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Ann. Mag. Nat. Hist. , ser. 8 vol. 13 p. 436 if (!$matched) { if (preg_match('/(?<journal>.*)\\s*,\\s+ser.\\s+(?<series>\\d+)\\s+vol.\\s*(?<volume>\\d+)\\s+p.\\s+(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } print_r($matches); echo $publication; if (!$matched) { } else { $reference = reference_from_matches($matches); if (!isset($reference->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($reference->secondary_title); if ($issn != '') { $reference->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($reference->oclc)) { $oclc = oclc_for_title($reference->secondary_title); if ($oclc != 0) { $reference->oclc = $oclc; } } } } //print_r($reference); $atitle = ''; if (isset($reference->title)) { $atitle = $reference->title; } $search_hits = bhl_find_article($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : '', isset($reference->year) ? $reference->year : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : ''); } //print_r($search_hits); foreach ($search_hits as $hit) { $pages[] = $hit->PageID; } } print_r($pages); return $pages; }
/** * @brief Handle OpenURL request * * We may have more than one parameter with same name, so need to access QUERY_STRING, not _GET * http://stackoverflow.com/questions/353379/how-to-get-multiple-parameters-with-same-name-from-a-url-in-php * */ function main() { global $config; global $debug; global $format; $id = 0; $callback = ''; // If no query parameters if (count($_GET) == 0) { display_form(); exit(0); } if (isset($_GET['format'])) { switch ($_GET['format']) { case 'html': $format = 'html'; break; case 'json': $format = 'json'; break; default: $format = 'html'; break; } } if (isset($_GET['callback'])) { $callback = $_GET['callback']; } $debug = false; if (isset($_GET['debug'])) { $debug = true; } // Handle query and display results. $query = explode('&', html_entity_decode($_SERVER['QUERY_STRING'])); $params = array(); foreach ($query as $param) { list($key, $value) = explode('=', $param); $key = preg_replace('/^\\?/', '', urldecode($key)); $params[$key][] = trim(urldecode($value)); } if ($debug) { echo '<h1>Params</h1>'; echo '<pre>'; print_r($params); echo '</pre>'; } // This is what we got from user $referent = new stdclass(); parse_openurl($params, $referent); // Flesh it out // If we are looking for an article we need an ISSN, or at least an OCLC // Ask whether have this in our database (assumes we have ISSN) if (!isset($referent->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($referent->secondary_title); if ($issn != '') { $referent->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($referent->oclc)) { $oclc = oclc_for_title($referent->secondary_title); if ($oclc != 0) { $referent->oclc = $oclc; } } } } if ($debug) { echo '<h1>Referent</h1>'; echo '<pre>'; print_r($referent); echo '</pre>'; } // Handle identifiers if (isset($referent->url)) { // BHL URL, for example if we have already mapped article to BHL // in Zotero, if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $referent->url, $matches)) { //print_r($matches); $PageID = $matches['pageid']; $references = bhl_reference_from_pageid($PageID); //print_r($references); if (count($references) == 0) { // We don't have an article for this PageID $search_hit = bhl_score_page($PageID, $referent->title); // Store $id = db_store_article($referent, $PageID); } else { // Have a reference with this PageID already // Will need to handle case where > 1 article on same page, e.g. // http://www.biodiversitylibrary.org/page/3336598 $id = $references[0]; } // Did we get a hit? if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: ' . $config['web_root'] . 'reference/' . $id . "\n\n"); break; } exit; } } } // OK, we're not forcing a match to BHL, so do we have this article? $id = db_find_article($referent); //echo "<b>id=$id</b><br/>"; if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Twitter as log if ($config['twitter']) { $tweet_this = false; $tweet_this = isset($_GET['rfr_id']); if ($tweet_this) { $url = $config['web_root'] . 'reference/' . $id . ' '; // . '#openurl'; // url + hashtag $url = $id; $url_len = strlen($url); $status = ''; //$text = $_GET['rfr_id']; $text = '#openurl ' . $_SERVER["HTTP_REFERER"]; //$text .= ' @rdmpage'; if (isset($article->title)) { } $status = $text; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } $status .= ' ' . $url; tweet($status); } } // Redirect to reference display header('Location: reference/' . $id . "\n\n"); break; } exit; } // OK, not found, so let's go look for it... // Search BHL $atitle = ''; if (isset($referent->title)) { $atitle = $referent->title; } $search_hits = bhl_find_article($atitle, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->date) ? $referent->date : '', isset($referent->issn) ? $referent->issn : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($referent->title, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->issn) ? $referent->issn : ''); } // At this point if we haven't found it in BHL we could go elsewhere, e.g. bioGUID, // in which case we'd need to take this into account when displaying HTML and JSON if ($debug) { echo '<h3>Search hits</h3>'; echo '<pre>'; print_r($search_hits); echo '</pre>'; } if (1) { // Check whether we already have an article that starts on this foreach ($search_hits as $hit) { $references = bhl_reference_from_pageid($hit->PageID); //print_r($references); if (count($references) != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($references[0]); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: reference/' . $references[0] . "\n\n"); break; } exit; } } } // Output search results in various formats... switch ($format) { case 'json': display_bhl_result_json($referent, $search_hits, $callback); break; case 'html': default: display_bhl_result_html($referent, $search_hits); break; } }
function test_bhl_find() { $tests = array(); //---------------------------------------------------------------------------------------------- // Journal of Hymenoptera Research array_push($tests, array('title' => 'Journal of Hymenoptera Research', 'volume' => 6, 'spage' => 256, 'PageID' => 4491707)); // Multiple pages in same item (multiple volumes) array_push($tests, array('title' => 'Journal of Hymenoptera Research', 'volume' => 8, 'spage' => 1, 'PageID' => 4491014)); //---------------------------------------------------------------------------------------------- // Fieldiana array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 31, 'spage' => 149, 'PageID' => 2763486)); array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 39, 'spage' => 577, 'PageID' => 2866715)); // Two hits array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 73, 'spage' => 49, 'PageID' => 2759622)); array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 77, 'spage' => 1, 'PageID' => 2866529)); //---------------------------------------------------------------------------------------------- // University of Kansas Science Bulletin array_push($tests, array('title' => 'University of Kansas Science Bulletin', 'volume' => 35, 'spage' => 577, 'PageID' => 4413503)); //---------------------------------------------------------------------------------------------- // Bulletin of Zoological Nomenclature array_push($tests, array('title' => 'Bulletin of Zoological Nomenclature', 'volume' => 23, 'spage' => 169, 'PageID' => 12222978)); //---------------------------------------------------------------------------------------------- // Proceedings of the California Academy of Sciences array_push($tests, array('title' => 'Proceedings of the California Academy of Sciences', 'volume' => 47, 'spage' => 47, 'PageID' => 15776069)); //---------------------------------------------------------------------------------------------- // Ann Mag Nat Hist array_push($tests, array('title' => 'Ann Mag Nat Hist', 'volume' => 20, 'spage' => 413, 'series' => 8, 'PageID' => 15611435)); //---------------------------------------------------------------------------------------------- // Bulletin of the British Museum (Natural History). Zoology array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 34, 'spage' => 65, 'PageID' => 2261841)); array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 27, 'spage' => 65, 'PageID' => 2261309)); array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 27, 'spage' => 59, 'PageID' => 2261319)); //---------------------------------------------------------------------------------------------- // Bulletin of the British Museum (Natural History). Entomology array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History): Entomology', 'volume' => 12, 'spage' => 247, 'PageID' => 2298342)); //---------------------------------------------------------------------------------------------- // Memoirs of the Museum of Comparative Zoölogy array_push($tests, array('title' => 'Memoirs of the Museum of Comparative Zoölogy', 'volume' => 50, 'spage' => 85, 'PageID' => 15776069)); //---------------------------------------------------------------------------------------------- // Proc. ent. Soc. Wash. // Banks, N. (1899b). Some spiders from northern Louisiana. Proc. ent. Soc. Wash. 4: 188-195. array_push($tests, array('title' => 'Proc. ent. Soc. Wash.', 'volume' => 4, 'spage' => 188, 'PageID' => 2299619)); //---------------------------------------------------------------------------------------------- // Ann. Soc. ent. Fr. // Simon, E. (1885c). Etudes arachnologiques. 17e Mémoire. XXIV. Arachnides recuellis dans la // vallée de Tempé et sur le mont Ossa (Thessalie). Ann. Soc. ent. Fr. (6) 5: 209-218. array_push($tests, array('title' => 'Ann. Soc. ent. Fr.', 'volume' => 5, 'spage' => 209, 'series' => 6, 'PageID' => 10171703)); //---------------------------------------------------------------------------------------------- // Mitteilungen der Schweizerischen Entomologischen Gesellschaft // Forel A (1887) Fourmis récoltées à Madagascar par le Dr. Conrad Keller. Mitteilungen der Schweizerischen Entomologischen Gesellschaft 7: 381–389. array_push($tests, array('title' => 'Mitteilungen der Schweizerischen Entomologischen Gesellschaft', 'volume' => 7, 'spage' => 381, 'PageID' => 10395996)); //---------------------------------------------------------------------------------------------- // Revue zoologique africaine array_push($tests, array('title' => 'Revue zoologique africaine', 'volume' => 9, 'spage' => 1, 'PageID' => 4491707)); echo '<pre>'; $ok = 0; $failed = array(); foreach ($tests as $test) { echo $test['title'] . ' ' . $test['volume'] . ' ' . $test['spage'] . ' ...'; $search_hits = bhl_find_article($test['title'], $test['volume'], $test['spage'], isset($test['series']) ? $test['series'] : ''); $hits = $search_hits; $matched = in_array($test['PageID'], $hits->hits); if ($matched) { $ok++; echo " [" . count($hits->hits) . "] ok\n"; } else { echo " not found\n"; array_push($failed, array($test, $hits)); } } // Report echo count($tests) . ' references, ' . (count($tests) - $ok) . ' failed' . "\n"; print_r($failed); echo '</pre>'; }