function find_reference($journal, $volume, $page, $year = '') { global $db; $references = array(); $hits = array(); if ($journal == '' || $volume == '' || $page == '') { //echo "journal=$journal\n"; return $hits; } $series = ''; $issn = ''; $oclc = 0; if (preg_match('/^(?<journal>.*),\\s+ser\\.\\s+(?<series>.*)$/', $journal, $m)) { $journal = $m['journal']; $series = $m['series']; } if (preg_match('/^(?<journal>.*),\\s+(ns|n.s.)$/', $journal, $m)) { $journal = $m['journal']; //$series = $m['series']; } if (preg_match('/^(?<page>\\d+),(.*)$/', $page, $m)) { $page = $m['page']; } $issn = issn_from_title($journal); if ($issn == '') { $oclc = oclc_for_title($journal); } //echo $issn; //echo $oclc; $sql = ''; if ($issn != '') { $sql = 'SELECT * FROM rdmp_reference WHERE issn=' . $db->qstr($issn) . ' AND volume=' . $db->qstr($volume) . ' AND ' . $page . ' BETWEEN spage AND epage'; } if ($oclc != '') { $sql = 'SELECT * FROM rdmp_reference WHERE oclc=' . $db->qstr($oclc) . ' AND volume=' . $db->qstr($volume) . ' AND ' . $page . ' BETWEEN spage AND epage'; } if ($sql != '') { if ($series != '') { $sql .= ' AND series=' . $db->qstr($series); } if ($year != '') { $sql .= ' AND year=' . $db->qstr($year); } $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } while (!$result->EOF) { $references[] = $result->fields['reference_id']; $result->MoveNext(); } } foreach ($references as $reference_id) { $hits[] = reference_to_bibjson(db_retrieve_reference($reference_id)); } return $hits; }
/** * @brief Handle OpenURL request * * We may have more than one parameter with same name, so need to access QUERY_STRING, not _GET * http://stackoverflow.com/questions/353379/how-to-get-multiple-parameters-with-same-name-from-a-url-in-php * */ function main() { global $config; global $debug; global $format; $id = 0; $callback = ''; // If no query parameters if (count($_GET) == 0) { display_form(); exit(0); } if (isset($_GET['format'])) { switch ($_GET['format']) { case 'html': $format = 'html'; break; case 'json': $format = 'json'; break; default: $format = 'html'; break; } } if (isset($_GET['callback'])) { $callback = $_GET['callback']; } $debug = false; if (isset($_GET['debug'])) { $debug = true; } // Handle query and display results. $query = explode('&', html_entity_decode($_SERVER['QUERY_STRING'])); $params = array(); foreach ($query as $param) { list($key, $value) = explode('=', $param); $key = preg_replace('/^\\?/', '', urldecode($key)); $params[$key][] = trim(urldecode($value)); } if ($debug) { echo '<h1>Params</h1>'; echo '<pre>'; print_r($params); echo '</pre>'; } // This is what we got from user $referent = new stdclass(); parse_openurl($params, $referent); // Flesh it out // If we are looking for an article we need an ISSN, or at least an OCLC // Ask whether have this in our database (assumes we have ISSN) if (!isset($referent->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($referent->secondary_title); if ($issn != '') { $referent->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($referent->oclc)) { $oclc = oclc_for_title($referent->secondary_title); if ($oclc != 0) { $referent->oclc = $oclc; } } } } if ($debug) { echo '<h1>Referent</h1>'; echo '<pre>'; print_r($referent); echo '</pre>'; } // Handle identifiers if (isset($referent->url)) { // BHL URL, for example if we have already mapped article to BHL // in Zotero, if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $referent->url, $matches)) { //print_r($matches); $PageID = $matches['pageid']; $references = bhl_reference_from_pageid($PageID); //print_r($references); if (count($references) == 0) { // We don't have an article for this PageID $search_hit = bhl_score_page($PageID, $referent->title); // Store $id = db_store_article($referent, $PageID); } else { // Have a reference with this PageID already // Will need to handle case where > 1 article on same page, e.g. // http://www.biodiversitylibrary.org/page/3336598 $id = $references[0]; } // Did we get a hit? if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: ' . $config['web_root'] . 'reference/' . $id . "\n\n"); break; } exit; } } } // OK, we're not forcing a match to BHL, so do we have this article? $id = db_find_article($referent); //echo "<b>id=$id</b><br/>"; if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Twitter as log if ($config['twitter']) { $tweet_this = false; $tweet_this = isset($_GET['rfr_id']); if ($tweet_this) { $url = $config['web_root'] . 'reference/' . $id . ' '; // . '#openurl'; // url + hashtag $url = $id; $url_len = strlen($url); $status = ''; //$text = $_GET['rfr_id']; $text = '#openurl ' . $_SERVER["HTTP_REFERER"]; //$text .= ' @rdmpage'; if (isset($article->title)) { } $status = $text; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } $status .= ' ' . $url; tweet($status); } } // Redirect to reference display header('Location: reference/' . $id . "\n\n"); break; } exit; } // OK, not found, so let's go look for it... // Search BHL $atitle = ''; if (isset($referent->title)) { $atitle = $referent->title; } $search_hits = bhl_find_article($atitle, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->date) ? $referent->date : '', isset($referent->issn) ? $referent->issn : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($referent->title, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->issn) ? $referent->issn : ''); } // At this point if we haven't found it in BHL we could go elsewhere, e.g. bioGUID, // in which case we'd need to take this into account when displaying HTML and JSON if ($debug) { echo '<h3>Search hits</h3>'; echo '<pre>'; print_r($search_hits); echo '</pre>'; } if (1) { // Check whether we already have an article that starts on this foreach ($search_hits as $hit) { $references = bhl_reference_from_pageid($hit->PageID); //print_r($references); if (count($references) != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($references[0]); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: reference/' . $references[0] . "\n\n"); break; } exit; } } } // Output search results in various formats... switch ($format) { case 'json': display_bhl_result_json($referent, $search_hits, $callback); break; case 'html': default: display_bhl_result_html($referent, $search_hits); break; } }
function matching_pages($publication, $year) { global $debug; $pages = array(); $matches = array(); $matched = false; // Parse citation if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\.?$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(\\((?<series>.*)\\)\\s+)?(?<volume>\\d+),(\\s+\\((?<issue>\\d+)\\))?\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+),/Uu', $publication, $matches)) { $matched = true; } } // Proc. U.S. nat. Mus., 99, no. 3247, 475. if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+), no. (?<issue>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Spixiana 7 (2): 125. if (!$matched) { if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\):\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Ann. Mag. Nat. Hist. , ser. 8 vol. 13 p. 436 if (!$matched) { if (preg_match('/(?<journal>.*)\\s*,\\s+ser.\\s+(?<series>\\d+)\\s+vol.\\s*(?<volume>\\d+)\\s+p.\\s+(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } print_r($matches); echo $publication; if (!$matched) { } else { $reference = reference_from_matches($matches); if (!isset($reference->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($reference->secondary_title); if ($issn != '') { $reference->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($reference->oclc)) { $oclc = oclc_for_title($reference->secondary_title); if ($oclc != 0) { $reference->oclc = $oclc; } } } } //print_r($reference); $atitle = ''; if (isset($reference->title)) { $atitle = $reference->title; } $search_hits = bhl_find_article($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : '', isset($reference->year) ? $reference->year : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : ''); } //print_r($search_hits); foreach ($search_hits as $hit) { $pages[] = $hit->PageID; } } print_r($pages); return $pages; }