/** * @brief Handle OpenURL request * * We may have more than one parameter with same name, so need to access QUERY_STRING, not _GET * http://stackoverflow.com/questions/353379/how-to-get-multiple-parameters-with-same-name-from-a-url-in-php * */ function main() { global $config; global $debug; global $format; $id = 0; $callback = ''; // If no query parameters if (count($_GET) == 0) { display_form(); exit(0); } if (isset($_GET['format'])) { switch ($_GET['format']) { case 'html': $format = 'html'; break; case 'json': $format = 'json'; break; default: $format = 'html'; break; } } if (isset($_GET['callback'])) { $callback = $_GET['callback']; } $debug = false; if (isset($_GET['debug'])) { $debug = true; } // Handle query and display results. $query = explode('&', html_entity_decode($_SERVER['QUERY_STRING'])); $params = array(); foreach ($query as $param) { list($key, $value) = explode('=', $param); $key = preg_replace('/^\\?/', '', urldecode($key)); $params[$key][] = trim(urldecode($value)); } if ($debug) { echo '<h1>Params</h1>'; echo '<pre>'; print_r($params); echo '</pre>'; } // This is what we got from user $referent = new stdclass(); parse_openurl($params, $referent); // Flesh it out // If we are looking for an article we need an ISSN, or at least an OCLC // Ask whether have this in our database (assumes we have ISSN) if (!isset($referent->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($referent->secondary_title); if ($issn != '') { $referent->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($referent->oclc)) { $oclc = oclc_for_title($referent->secondary_title); if ($oclc != 0) { $referent->oclc = $oclc; } } } } if ($debug) { echo '<h1>Referent</h1>'; echo '<pre>'; print_r($referent); echo '</pre>'; } // Handle identifiers if (isset($referent->url)) { // BHL URL, for example if we have already mapped article to BHL // in Zotero, if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $referent->url, $matches)) { //print_r($matches); $PageID = $matches['pageid']; $references = bhl_reference_from_pageid($PageID); //print_r($references); if (count($references) == 0) { // We don't have an article for this PageID $search_hit = bhl_score_page($PageID, $referent->title); // Store $id = db_store_article($referent, $PageID); } else { // Have a reference with this PageID already // Will need to handle case where > 1 article on same page, e.g. // http://www.biodiversitylibrary.org/page/3336598 $id = $references[0]; } // Did we get a hit? if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: ' . $config['web_root'] . 'reference/' . $id . "\n\n"); break; } exit; } } } // OK, we're not forcing a match to BHL, so do we have this article? $id = db_find_article($referent); //echo "<b>id=$id</b><br/>"; if ($id != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($id); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Twitter as log if ($config['twitter']) { $tweet_this = false; $tweet_this = isset($_GET['rfr_id']); if ($tweet_this) { $url = $config['web_root'] . 'reference/' . $id . ' '; // . '#openurl'; // url + hashtag $url = $id; $url_len = strlen($url); $status = ''; //$text = $_GET['rfr_id']; $text = '#openurl ' . $_SERVER["HTTP_REFERER"]; //$text .= ' @rdmpage'; if (isset($article->title)) { } $status = $text; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } $status .= ' ' . $url; tweet($status); } } // Redirect to reference display header('Location: reference/' . $id . "\n\n"); break; } exit; } // OK, not found, so let's go look for it... // Search BHL $atitle = ''; if (isset($referent->title)) { $atitle = $referent->title; } $search_hits = bhl_find_article($atitle, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->date) ? $referent->date : '', isset($referent->issn) ? $referent->issn : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($referent->title, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->issn) ? $referent->issn : ''); } // At this point if we haven't found it in BHL we could go elsewhere, e.g. bioGUID, // in which case we'd need to take this into account when displaying HTML and JSON if ($debug) { echo '<h3>Search hits</h3>'; echo '<pre>'; print_r($search_hits); echo '</pre>'; } if (1) { // Check whether we already have an article that starts on this foreach ($search_hits as $hit) { $references = bhl_reference_from_pageid($hit->PageID); //print_r($references); if (count($references) != 0) { // We have this reference in our database switch ($format) { case 'json': // Display object $reference = db_retrieve_reference($references[0]); header("Content-type: text/plain; charset=utf-8\n\n"); if ($callback != '') { echo $callback . '('; } echo json_format(json_encode($reference)); if ($callback != '') { echo ')'; } break; case 'html': default: // Redirect to reference display header('Location: reference/' . $references[0] . "\n\n"); break; } exit; } } } // Output search results in various formats... switch ($format) { case 'json': display_bhl_result_json($referent, $search_hits, $callback); break; case 'html': default: display_bhl_result_html($referent, $search_hits); break; } }
function db_store_article($article, $PageID = 0, $updating = false) { global $db; global $config; $update = false; $id = 0; // If we are editing an existing reference then we already know its id if (isset($article->reference_id)) { $id = $article->reference_id; } else { $id = db_find_article($article); } if ($id != 0) { if ($updating) { $update = true; } else { return $id; } } // Try and trap empty references if ($id == 0) { $ok = false; if (isset($article->title)) { $ok = $article->title != ''; } if (!$ok) { return 0; } } if (!isset($article->genre)) { $article->genre = 'article'; } $keys = array(); $values = array(); // Article metadata foreach ($article as $k => $v) { switch ($k) { // Ignore as it's an array case 'authors': break; case 'date': $keys[] = 'date'; $values[] = $db->qstr($v); if (!isset($article->year)) { $keys[] = 'year'; $values[] = $db->qstr(year_from_date($v)); } break; // Don't store BHL URL here // Don't store BHL URL here case 'url': if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) { } else { // extract Handle if it exists if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) { $keys[] = 'hdl'; $values[] = $db->qstr($m['hdl']); } else { $keys[] = $k; $values[] = $db->qstr($v); } } break; // Things we store as is // Things we store as is case 'title': case 'secondary_title': case 'volume': case 'series': case 'issue': case 'spage': case 'epage': case 'year': case 'date': case 'issn': case 'genre': case 'doi': case 'hdl': case 'lsid': case 'oclc': case 'pdf': case 'abstract': case 'pmid': $keys[] = $k; $values[] = $db->qstr($v); break; // Things we ignore // Things we ignore default: break; } } // Date if (!isset($article->date) && isset($article->year)) { $keys[] = 'date'; $values[] = $db->qstr($article->year . '-00-00'); } // BHL PageID if ($PageID != 0) { $keys[] = 'PageID'; $values[] = $PageID; } // SICI $s = new Sici(); $sici = $s->create($article); if ($sici != '') { $keys[] = 'sici'; $values[] = $db->qstr($sici); } if ($update) { // Versioning? // Delete links (author, pages, etc) // Don't delete page range as we may loose plates, etc. outside range /* $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); */ $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // update (updated timestamp will be automatically updated) $sql = 'UPDATE rdmp_reference SET '; $num_values = count($keys); for ($i = 0; $i < $num_values; $i++) { if ($i > 0) { $sql .= ', '; } $sql .= $keys[$i] . '=' . $values[$i]; } $sql .= ' WHERE reference_id=' . $id; /* $cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file"); @fwrite($cache_file, $sql); fclose($cache_file); */ $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } else { // Adding article for first time so add 'created' and 'updated' timestamp $keys[] = 'created'; $values[] = 'NOW()'; $keys[] = 'updated'; $values[] = 'NOW()'; $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } $id = $db->Insert_ID(); // Store reference_cluster_id which we can use to group duplicates, by default // reference_cluster_id = reference_id $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } // Indexing------------------------------------------------------------------------------------- if (1) { // solr // this code is redundant with code in reference.php but I use different objects // here and there (doh!). Also once we've added old stuff to solr this is the only place we // should be calling solr $solr = new Apache_Solr_Service('localhost', '8983', '/solr'); if (!$solr->ping()) { echo 'Solr service not responding.'; exit; } $item = array(); $item['id'] = 'reference/' . $id; $item['title'] = $article->title; $item['publication_outlet'] = $article->secondary_title; $item['year'] = $article->year; $authors = array(); foreach ($article->authors as $a) { $authors[] = $a->forename . ' ' . $a->surname; } $item['authors'] = $authors; $citation = ''; $citation .= ' ' . $article->year; $citation .= ' ' . $article->title; $citation .= ' ' . $article->secondary_title; $citation .= ' ' . $article->volume; if (isset($article->issue)) { $citation .= '(' . $article->issue . ')'; } $citation .= ':'; $citation .= ' '; $citation .= $article->spage; if (isset($article->epage)) { $citation .= '-' . $article->epage; } $item['citation'] = $citation; $text = ''; $num_authors = count($article->authors); $count = 0; if ($num_authors > 0) { foreach ($article->authors as $author) { $text .= $author->forename . ' ' . $author->lastname; if (isset($author->suffix)) { $text .= ' ' . $author->suffix; } $count++; if ($count == 2 && $num_authors > 3) { $text .= ' et al.'; break; } if ($count < $num_authors - 1) { $text .= ', '; } else { if ($count < $num_authors) { $text .= ' and '; } } } } $item['citation'] = $text . ' ' . $citation; $parts = array(); $parts[] = $item; //print_r($parts); // add to solr $documents = array(); foreach ($parts as $item => $fields) { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->{$key} = $value; } } $documents[] = $part; } // // // Load the documents into the index // try { $solr->addDocuments($documents); $solr->commit(); $solr->optimize(); } catch (Exception $e) { echo $e->getMessage(); } } else { $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata) if (isset($article->title)) { $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text) VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Versioning----------------------------------------------------------------------------------- // Store this object in version table so we can recover it if we overwrite item $ip = getip(); $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Author(s)------------------------------------------------------------------------------------ // Store author as and link to the article if (isset($article->authors)) { db_store_authors($id, $article->authors); } // Store page range (only if not updating, otherwise we may loose plates, etc. // that aren't in page range) if ($PageID != 0 && !$update) { $page_range = array(); if (isset($article->spage) && isset($article->epage)) { $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1); } else { // No epage, so just get spage (to do: how do we tell user we don't have page range?) $page_range = bhl_page_range($PageID, 0); } //print_r($page_range); $count = 0; foreach ($page_range as $page) { $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) VALUES (' . $id . ',' . $page . ',' . $count++ . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Tweet---------------------------------------------------------------------------------------- if (!$update) { if ($config['twitter']) { $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib'; // url + hashtag $url_len = strlen($url); $status = ''; if (isset($article->title)) { $status = $article->title; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } } $status .= ' ' . $url; tweet($status); } } return $id; }
function postprocess_citations($reference_id, $citations) { global $db; $citation_count = 0; // Avoid duplications if ($reference_id != 0) { $sql = 'DELETE FROM rdmp_reference_cites WHERE (reference_id=' . $reference_id . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } foreach ($citations as $citation) { $citation_reference_id = 0; if (isset($citation->genre)) { // Lookup articles if ($citation->genre == 'article') { $citation_reference_id = 0; // Do we have this already in BioStor? $citation_reference_id = db_find_article($citation); if ($citation_reference_id == 0) { // Try BioStor OpenURL search $citation_reference_id = import_from_openurl(reference_to_openurl($citation)); } if ($citation_reference_id == 0) { // Try bioGUID if (bioguid_openurl_search($citation)) { $citation_reference_id = db_store_article($citation); } } } } // At this stage if citation_reference_id is 0 we haven't found it // 1. store citation string (we are building a list of all such strings) $citation_string_id = store_citation_string($citation); // 2. If we've found it in BioStor, record link between citation string and reference id if ($citation_reference_id != 0) { $sql = 'DELETE FROM rdmp_reference_citation_string_joiner WHERE(reference_id=' . $citation_reference_id . ') AND (citation_string_id=' . $citation_string_id . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } $sql = 'INSERT INTO rdmp_reference_citation_string_joiner (reference_id,citation_string_id) VALUES(' . $citation_reference_id . ',' . $citation_string_id . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } // 3. Store link between source reference and citation string (which we will use to display literature cited) if ($reference_id != 0) { $sql = 'INSERT INTO rdmp_reference_cites (reference_id, citation_string_id, citation_order) VALUES(' . $reference_id . ',' . $citation_string_id . ',' . $citation_count . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } $citation_count++; } } }