Exemple #1
0
/**
 * @brief Handle OpenURL request
 *
 * We may have more than one parameter with same name, so need to access QUERY_STRING, not _GET
 * http://stackoverflow.com/questions/353379/how-to-get-multiple-parameters-with-same-name-from-a-url-in-php
 *
 */
function main()
{
    global $config;
    global $debug;
    global $format;
    $id = 0;
    $callback = '';
    // If no query parameters
    if (count($_GET) == 0) {
        display_form();
        exit(0);
    }
    if (isset($_GET['format'])) {
        switch ($_GET['format']) {
            case 'html':
                $format = 'html';
                break;
            case 'json':
                $format = 'json';
                break;
            default:
                $format = 'html';
                break;
        }
    }
    if (isset($_GET['callback'])) {
        $callback = $_GET['callback'];
    }
    $debug = false;
    if (isset($_GET['debug'])) {
        $debug = true;
    }
    // Handle query and display results.
    $query = explode('&', html_entity_decode($_SERVER['QUERY_STRING']));
    $params = array();
    foreach ($query as $param) {
        list($key, $value) = explode('=', $param);
        $key = preg_replace('/^\\?/', '', urldecode($key));
        $params[$key][] = trim(urldecode($value));
    }
    if ($debug) {
        echo '<h1>Params</h1>';
        echo '<pre>';
        print_r($params);
        echo '</pre>';
    }
    // This is what we got from user
    $referent = new stdclass();
    parse_openurl($params, $referent);
    // Flesh it out
    // If we are looking for an article we need an ISSN, or at least an OCLC
    // Ask whether have this in our database (assumes we have ISSN)
    if (!isset($referent->issn)) {
        // Try and get ISSN from bioGUID
        $issn = issn_from_title($referent->secondary_title);
        if ($issn != '') {
            $referent->issn = $issn;
        } else {
            // No luck with ISSN, look for OCLC
            if (!isset($referent->oclc)) {
                $oclc = oclc_for_title($referent->secondary_title);
                if ($oclc != 0) {
                    $referent->oclc = $oclc;
                }
            }
        }
    }
    if ($debug) {
        echo '<h1>Referent</h1>';
        echo '<pre>';
        print_r($referent);
        echo '</pre>';
    }
    // Handle identifiers
    if (isset($referent->url)) {
        // BHL URL, for example if we have already mapped article to BHL
        // in Zotero,
        if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $referent->url, $matches)) {
            //print_r($matches);
            $PageID = $matches['pageid'];
            $references = bhl_reference_from_pageid($PageID);
            //print_r($references);
            if (count($references) == 0) {
                // We don't have an article for this PageID
                $search_hit = bhl_score_page($PageID, $referent->title);
                // Store
                $id = db_store_article($referent, $PageID);
            } else {
                // Have a reference with this PageID already
                // Will need to handle case where > 1 article on same page, e.g.
                // http://www.biodiversitylibrary.org/page/3336598
                $id = $references[0];
            }
            // Did we get a hit?
            if ($id != 0) {
                // We have this reference in our database
                switch ($format) {
                    case 'json':
                        // Display object
                        $reference = db_retrieve_reference($id);
                        header("Content-type: text/plain; charset=utf-8\n\n");
                        if ($callback != '') {
                            echo $callback . '(';
                        }
                        echo json_format(json_encode($reference));
                        if ($callback != '') {
                            echo ')';
                        }
                        break;
                    case 'html':
                    default:
                        // Redirect to reference display
                        header('Location: ' . $config['web_root'] . 'reference/' . $id . "\n\n");
                        break;
                }
                exit;
            }
        }
    }
    // OK, we're not forcing a match to BHL, so do we have this article?
    $id = db_find_article($referent);
    //echo "<b>id=$id</b><br/>";
    if ($id != 0) {
        // We have this reference in our database
        switch ($format) {
            case 'json':
                // Display object
                $reference = db_retrieve_reference($id);
                header("Content-type: text/plain; charset=utf-8\n\n");
                if ($callback != '') {
                    echo $callback . '(';
                }
                echo json_format(json_encode($reference));
                if ($callback != '') {
                    echo ')';
                }
                break;
            case 'html':
            default:
                // Twitter as log
                if ($config['twitter']) {
                    $tweet_this = false;
                    $tweet_this = isset($_GET['rfr_id']);
                    if ($tweet_this) {
                        $url = $config['web_root'] . 'reference/' . $id . ' ';
                        //  . '#openurl'; // url + hashtag
                        $url = $id;
                        $url_len = strlen($url);
                        $status = '';
                        //$text = $_GET['rfr_id'];
                        $text = '#openurl ' . $_SERVER["HTTP_REFERER"];
                        //$text .= ' @rdmpage';
                        if (isset($article->title)) {
                        }
                        $status = $text;
                        $status_len = strlen($status);
                        $extra = 140 - $status_len - $url_len - 1;
                        if ($extra < 0) {
                            $status_len += $extra;
                            $status_len -= 1;
                            $status = substr($status, 0, $status_len);
                            $status .= '…';
                        }
                        $status .= ' ' . $url;
                        tweet($status);
                    }
                }
                // Redirect to reference display
                header('Location: reference/' . $id . "\n\n");
                break;
        }
        exit;
    }
    // OK, not found, so let's go look for it...
    // Search BHL
    $atitle = '';
    if (isset($referent->title)) {
        $atitle = $referent->title;
    }
    $search_hits = bhl_find_article($atitle, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->date) ? $referent->date : '', isset($referent->issn) ? $referent->issn : '');
    if (count($search_hits) == 0) {
        // try alternative way of searching using article title
        $search_hits = bhl_find_article_from_article_title($referent->title, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->issn) ? $referent->issn : '');
    }
    // At this point if we haven't found it in BHL we could go elsewhere, e.g. bioGUID,
    // in which case we'd need to take this into account when displaying HTML and JSON
    if ($debug) {
        echo '<h3>Search hits</h3>';
        echo '<pre>';
        print_r($search_hits);
        echo '</pre>';
    }
    if (1) {
        // Check whether we already have an article that starts on this
        foreach ($search_hits as $hit) {
            $references = bhl_reference_from_pageid($hit->PageID);
            //print_r($references);
            if (count($references) != 0) {
                // We have this reference in our database
                switch ($format) {
                    case 'json':
                        // Display object
                        $reference = db_retrieve_reference($references[0]);
                        header("Content-type: text/plain; charset=utf-8\n\n");
                        if ($callback != '') {
                            echo $callback . '(';
                        }
                        echo json_format(json_encode($reference));
                        if ($callback != '') {
                            echo ')';
                        }
                        break;
                    case 'html':
                    default:
                        // Redirect to reference display
                        header('Location: reference/' . $references[0] . "\n\n");
                        break;
                }
                exit;
            }
        }
    }
    // Output search results in various formats...
    switch ($format) {
        case 'json':
            display_bhl_result_json($referent, $search_hits, $callback);
            break;
        case 'html':
        default:
            display_bhl_result_html($referent, $search_hits);
            break;
    }
}
Exemple #2
0
function db_store_article($article, $PageID = 0, $updating = false)
{
    global $db;
    global $config;
    $update = false;
    $id = 0;
    // If we are editing an existing reference then we already know its id
    if (isset($article->reference_id)) {
        $id = $article->reference_id;
    } else {
        $id = db_find_article($article);
    }
    if ($id != 0) {
        if ($updating) {
            $update = true;
        } else {
            return $id;
        }
    }
    // Try and trap empty references
    if ($id == 0) {
        $ok = false;
        if (isset($article->title)) {
            $ok = $article->title != '';
        }
        if (!$ok) {
            return 0;
        }
    }
    if (!isset($article->genre)) {
        $article->genre = 'article';
    }
    $keys = array();
    $values = array();
    // Article metadata
    foreach ($article as $k => $v) {
        switch ($k) {
            // Ignore as it's an array
            case 'authors':
                break;
            case 'date':
                $keys[] = 'date';
                $values[] = $db->qstr($v);
                if (!isset($article->year)) {
                    $keys[] = 'year';
                    $values[] = $db->qstr(year_from_date($v));
                }
                break;
                // Don't store BHL URL here
            // Don't store BHL URL here
            case 'url':
                if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) {
                } else {
                    // extract Handle if it exists
                    if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) {
                        $keys[] = 'hdl';
                        $values[] = $db->qstr($m['hdl']);
                    } else {
                        $keys[] = $k;
                        $values[] = $db->qstr($v);
                    }
                }
                break;
                // Things we store as is
            // Things we store as is
            case 'title':
            case 'secondary_title':
            case 'volume':
            case 'series':
            case 'issue':
            case 'spage':
            case 'epage':
            case 'year':
            case 'date':
            case 'issn':
            case 'genre':
            case 'doi':
            case 'hdl':
            case 'lsid':
            case 'oclc':
            case 'pdf':
            case 'abstract':
            case 'pmid':
                $keys[] = $k;
                $values[] = $db->qstr($v);
                break;
                // Things we ignore
            // Things we ignore
            default:
                break;
        }
    }
    // Date
    if (!isset($article->date) && isset($article->year)) {
        $keys[] = 'date';
        $values[] = $db->qstr($article->year . '-00-00');
    }
    // BHL PageID
    if ($PageID != 0) {
        $keys[] = 'PageID';
        $values[] = $PageID;
    }
    // SICI
    $s = new Sici();
    $sici = $s->create($article);
    if ($sici != '') {
        $keys[] = 'sici';
        $values[] = $db->qstr($sici);
    }
    if ($update) {
        // Versioning?
        // Delete links	(author, pages, etc)
        // Don't delete page range as we may loose plates, etc. outside range
        /*
        $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        */
        $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // update (updated timestamp will be automatically updated)
        $sql = 'UPDATE rdmp_reference SET ';
        $num_values = count($keys);
        for ($i = 0; $i < $num_values; $i++) {
            if ($i > 0) {
                $sql .= ', ';
            }
            $sql .= $keys[$i] . '=' . $values[$i];
        }
        $sql .= ' WHERE reference_id=' . $id;
        /*		$cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file");
        		@fwrite($cache_file, $sql);
        		fclose($cache_file);
        */
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    } else {
        // Adding article for first time so add 'created' and 'updated' timestamp
        $keys[] = 'created';
        $values[] = 'NOW()';
        $keys[] = 'updated';
        $values[] = 'NOW()';
        $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        $id = $db->Insert_ID();
        // Store reference_cluster_id which we can use to group duplicates, by default
        // reference_cluster_id = reference_id
        $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    }
    // Indexing-------------------------------------------------------------------------------------
    if (1) {
        // solr
        // this code is redundant with code in reference.php but I use different objects
        // here and there (doh!). Also once we've added old stuff to solr this is the only place we
        // should be calling solr
        $solr = new Apache_Solr_Service('localhost', '8983', '/solr');
        if (!$solr->ping()) {
            echo 'Solr service not responding.';
            exit;
        }
        $item = array();
        $item['id'] = 'reference/' . $id;
        $item['title'] = $article->title;
        $item['publication_outlet'] = $article->secondary_title;
        $item['year'] = $article->year;
        $authors = array();
        foreach ($article->authors as $a) {
            $authors[] = $a->forename . ' ' . $a->surname;
        }
        $item['authors'] = $authors;
        $citation = '';
        $citation .= ' ' . $article->year;
        $citation .= ' ' . $article->title;
        $citation .= ' ' . $article->secondary_title;
        $citation .= ' ' . $article->volume;
        if (isset($article->issue)) {
            $citation .= '(' . $article->issue . ')';
        }
        $citation .= ':';
        $citation .= ' ';
        $citation .= $article->spage;
        if (isset($article->epage)) {
            $citation .= '-' . $article->epage;
        }
        $item['citation'] = $citation;
        $text = '';
        $num_authors = count($article->authors);
        $count = 0;
        if ($num_authors > 0) {
            foreach ($article->authors as $author) {
                $text .= $author->forename . ' ' . $author->lastname;
                if (isset($author->suffix)) {
                    $text .= ' ' . $author->suffix;
                }
                $count++;
                if ($count == 2 && $num_authors > 3) {
                    $text .= ' et al.';
                    break;
                }
                if ($count < $num_authors - 1) {
                    $text .= ', ';
                } else {
                    if ($count < $num_authors) {
                        $text .= ' and ';
                    }
                }
            }
        }
        $item['citation'] = $text . ' ' . $citation;
        $parts = array();
        $parts[] = $item;
        //print_r($parts);
        // add to solr
        $documents = array();
        foreach ($parts as $item => $fields) {
            $part = new Apache_Solr_Document();
            foreach ($fields as $key => $value) {
                if (is_array($value)) {
                    foreach ($value as $datum) {
                        $part->setMultiValue($key, $datum);
                    }
                } else {
                    $part->{$key} = $value;
                }
            }
            $documents[] = $part;
        }
        //
        //
        // Load the documents into the index
        //
        try {
            $solr->addDocuments($documents);
            $solr->commit();
            $solr->optimize();
        } catch (Exception $e) {
            echo $e->getMessage();
        }
    } else {
        $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata)
        if (isset($article->title)) {
            $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text)
			VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Versioning-----------------------------------------------------------------------------------
    // Store this object in version table so we can recover it if we overwrite item
    $ip = getip();
    $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')';
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    // Author(s)------------------------------------------------------------------------------------
    // Store author as and link to the article
    if (isset($article->authors)) {
        db_store_authors($id, $article->authors);
    }
    // Store page range (only if not updating, otherwise we may loose plates, etc.
    // that aren't in page range)
    if ($PageID != 0 && !$update) {
        $page_range = array();
        if (isset($article->spage) && isset($article->epage)) {
            $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1);
        } else {
            // No epage, so just get spage (to do: how do we tell user we don't have page range?)
            $page_range = bhl_page_range($PageID, 0);
        }
        //print_r($page_range);
        $count = 0;
        foreach ($page_range as $page) {
            $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) 
			VALUES (' . $id . ',' . $page . ',' . $count++ . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Tweet----------------------------------------------------------------------------------------
    if (!$update) {
        if ($config['twitter']) {
            $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib';
            // url + hashtag
            $url_len = strlen($url);
            $status = '';
            if (isset($article->title)) {
                $status = $article->title;
                $status_len = strlen($status);
                $extra = 140 - $status_len - $url_len - 1;
                if ($extra < 0) {
                    $status_len += $extra;
                    $status_len -= 1;
                    $status = substr($status, 0, $status_len);
                    $status .= '…';
                }
            }
            $status .= ' ' . $url;
            tweet($status);
        }
    }
    return $id;
}
Exemple #3
0
function postprocess_citations($reference_id, $citations)
{
    global $db;
    $citation_count = 0;
    // Avoid duplications
    if ($reference_id != 0) {
        $sql = 'DELETE FROM rdmp_reference_cites WHERE (reference_id=' . $reference_id . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    }
    foreach ($citations as $citation) {
        $citation_reference_id = 0;
        if (isset($citation->genre)) {
            // Lookup articles
            if ($citation->genre == 'article') {
                $citation_reference_id = 0;
                // Do we have this already in BioStor?
                $citation_reference_id = db_find_article($citation);
                if ($citation_reference_id == 0) {
                    // Try BioStor OpenURL search
                    $citation_reference_id = import_from_openurl(reference_to_openurl($citation));
                }
                if ($citation_reference_id == 0) {
                    // Try bioGUID
                    if (bioguid_openurl_search($citation)) {
                        $citation_reference_id = db_store_article($citation);
                    }
                }
            }
        }
        // At this stage if citation_reference_id is 0 we haven't found it
        // 1. store citation string (we are building a list of all such strings)
        $citation_string_id = store_citation_string($citation);
        // 2. If we've found it in BioStor, record link between citation string and reference id
        if ($citation_reference_id != 0) {
            $sql = 'DELETE FROM rdmp_reference_citation_string_joiner WHERE(reference_id=' . $citation_reference_id . ')
			AND (citation_string_id=' . $citation_string_id . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
            $sql = 'INSERT INTO rdmp_reference_citation_string_joiner (reference_id,citation_string_id)
			VALUES(' . $citation_reference_id . ',' . $citation_string_id . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
        // 3. Store link between source reference and citation string (which we will use to display literature cited)
        if ($reference_id != 0) {
            $sql = 'INSERT INTO rdmp_reference_cites (reference_id, citation_string_id, citation_order)
			VALUES(' . $reference_id . ',' . $citation_string_id . ',' . $citation_count . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
            $citation_count++;
        }
    }
}