Ejemplo n.º 1
0
function matching_pages($publication, $year)
{
    global $debug;
    $pages = array();
    $matches = array();
    $matched = false;
    // Parse citation
    if (!$matched) {
        //echo $publication;
        if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\.?$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(\\((?<series>.*)\\)\\s+)?(?<volume>\\d+),(\\s+\\((?<issue>\\d+)\\))?\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        //echo $publication;
        if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+),/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    // Proc. U.S. nat. Mus., 99, no. 3247, 475.
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+), no. (?<issue>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    if (!$matched) {
        if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    // Spixiana 7 (2): 125.
    if (!$matched) {
        if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\):\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    // Ann. Mag. Nat. Hist. , ser. 8 vol. 13 p. 436
    if (!$matched) {
        if (preg_match('/(?<journal>.*)\\s*,\\s+ser.\\s+(?<series>\\d+)\\s+vol.\\s*(?<volume>\\d+)\\s+p.\\s+(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) {
            $matched = true;
        }
    }
    print_r($matches);
    echo $publication;
    if (!$matched) {
    } else {
        $reference = reference_from_matches($matches);
        if (!isset($reference->issn)) {
            // Try and get ISSN from bioGUID
            $issn = issn_from_title($reference->secondary_title);
            if ($issn != '') {
                $reference->issn = $issn;
            } else {
                // No luck with ISSN, look for OCLC
                if (!isset($reference->oclc)) {
                    $oclc = oclc_for_title($reference->secondary_title);
                    if ($oclc != 0) {
                        $reference->oclc = $oclc;
                    }
                }
            }
        }
        //print_r($reference);
        $atitle = '';
        if (isset($reference->title)) {
            $atitle = $reference->title;
        }
        $search_hits = bhl_find_article($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : '', isset($reference->year) ? $reference->year : '');
        if (count($search_hits) == 0) {
            // try alternative way of searching using article title
            $search_hits = bhl_find_article_from_article_title($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : '');
        }
        //print_r($search_hits);
        foreach ($search_hits as $hit) {
            $pages[] = $hit->PageID;
        }
    }
    print_r($pages);
    return $pages;
}
Ejemplo n.º 2
0
/**
 * @brief Handle OpenURL request
 *
 * We may have more than one parameter with same name, so need to access QUERY_STRING, not _GET
 * http://stackoverflow.com/questions/353379/how-to-get-multiple-parameters-with-same-name-from-a-url-in-php
 *
 */
function main()
{
    global $config;
    global $debug;
    global $format;
    $id = 0;
    $callback = '';
    // If no query parameters
    if (count($_GET) == 0) {
        display_form();
        exit(0);
    }
    if (isset($_GET['format'])) {
        switch ($_GET['format']) {
            case 'html':
                $format = 'html';
                break;
            case 'json':
                $format = 'json';
                break;
            default:
                $format = 'html';
                break;
        }
    }
    if (isset($_GET['callback'])) {
        $callback = $_GET['callback'];
    }
    $debug = false;
    if (isset($_GET['debug'])) {
        $debug = true;
    }
    // Handle query and display results.
    $query = explode('&', html_entity_decode($_SERVER['QUERY_STRING']));
    $params = array();
    foreach ($query as $param) {
        list($key, $value) = explode('=', $param);
        $key = preg_replace('/^\\?/', '', urldecode($key));
        $params[$key][] = trim(urldecode($value));
    }
    if ($debug) {
        echo '<h1>Params</h1>';
        echo '<pre>';
        print_r($params);
        echo '</pre>';
    }
    // This is what we got from user
    $referent = new stdclass();
    parse_openurl($params, $referent);
    // Flesh it out
    // If we are looking for an article we need an ISSN, or at least an OCLC
    // Ask whether have this in our database (assumes we have ISSN)
    if (!isset($referent->issn)) {
        // Try and get ISSN from bioGUID
        $issn = issn_from_title($referent->secondary_title);
        if ($issn != '') {
            $referent->issn = $issn;
        } else {
            // No luck with ISSN, look for OCLC
            if (!isset($referent->oclc)) {
                $oclc = oclc_for_title($referent->secondary_title);
                if ($oclc != 0) {
                    $referent->oclc = $oclc;
                }
            }
        }
    }
    if ($debug) {
        echo '<h1>Referent</h1>';
        echo '<pre>';
        print_r($referent);
        echo '</pre>';
    }
    // Handle identifiers
    if (isset($referent->url)) {
        // BHL URL, for example if we have already mapped article to BHL
        // in Zotero,
        if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $referent->url, $matches)) {
            //print_r($matches);
            $PageID = $matches['pageid'];
            $references = bhl_reference_from_pageid($PageID);
            //print_r($references);
            if (count($references) == 0) {
                // We don't have an article for this PageID
                $search_hit = bhl_score_page($PageID, $referent->title);
                // Store
                $id = db_store_article($referent, $PageID);
            } else {
                // Have a reference with this PageID already
                // Will need to handle case where > 1 article on same page, e.g.
                // http://www.biodiversitylibrary.org/page/3336598
                $id = $references[0];
            }
            // Did we get a hit?
            if ($id != 0) {
                // We have this reference in our database
                switch ($format) {
                    case 'json':
                        // Display object
                        $reference = db_retrieve_reference($id);
                        header("Content-type: text/plain; charset=utf-8\n\n");
                        if ($callback != '') {
                            echo $callback . '(';
                        }
                        echo json_format(json_encode($reference));
                        if ($callback != '') {
                            echo ')';
                        }
                        break;
                    case 'html':
                    default:
                        // Redirect to reference display
                        header('Location: ' . $config['web_root'] . 'reference/' . $id . "\n\n");
                        break;
                }
                exit;
            }
        }
    }
    // OK, we're not forcing a match to BHL, so do we have this article?
    $id = db_find_article($referent);
    //echo "<b>id=$id</b><br/>";
    if ($id != 0) {
        // We have this reference in our database
        switch ($format) {
            case 'json':
                // Display object
                $reference = db_retrieve_reference($id);
                header("Content-type: text/plain; charset=utf-8\n\n");
                if ($callback != '') {
                    echo $callback . '(';
                }
                echo json_format(json_encode($reference));
                if ($callback != '') {
                    echo ')';
                }
                break;
            case 'html':
            default:
                // Twitter as log
                if ($config['twitter']) {
                    $tweet_this = false;
                    $tweet_this = isset($_GET['rfr_id']);
                    if ($tweet_this) {
                        $url = $config['web_root'] . 'reference/' . $id . ' ';
                        //  . '#openurl'; // url + hashtag
                        $url = $id;
                        $url_len = strlen($url);
                        $status = '';
                        //$text = $_GET['rfr_id'];
                        $text = '#openurl ' . $_SERVER["HTTP_REFERER"];
                        //$text .= ' @rdmpage';
                        if (isset($article->title)) {
                        }
                        $status = $text;
                        $status_len = strlen($status);
                        $extra = 140 - $status_len - $url_len - 1;
                        if ($extra < 0) {
                            $status_len += $extra;
                            $status_len -= 1;
                            $status = substr($status, 0, $status_len);
                            $status .= '…';
                        }
                        $status .= ' ' . $url;
                        tweet($status);
                    }
                }
                // Redirect to reference display
                header('Location: reference/' . $id . "\n\n");
                break;
        }
        exit;
    }
    // OK, not found, so let's go look for it...
    // Search BHL
    $atitle = '';
    if (isset($referent->title)) {
        $atitle = $referent->title;
    }
    $search_hits = bhl_find_article($atitle, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->date) ? $referent->date : '', isset($referent->issn) ? $referent->issn : '');
    if (count($search_hits) == 0) {
        // try alternative way of searching using article title
        $search_hits = bhl_find_article_from_article_title($referent->title, $referent->secondary_title, $referent->volume, isset($referent->spage) ? $referent->spage : $referent->pages, isset($referent->series) ? $referent->series : '', isset($referent->issn) ? $referent->issn : '');
    }
    // At this point if we haven't found it in BHL we could go elsewhere, e.g. bioGUID,
    // in which case we'd need to take this into account when displaying HTML and JSON
    if ($debug) {
        echo '<h3>Search hits</h3>';
        echo '<pre>';
        print_r($search_hits);
        echo '</pre>';
    }
    if (1) {
        // Check whether we already have an article that starts on this
        foreach ($search_hits as $hit) {
            $references = bhl_reference_from_pageid($hit->PageID);
            //print_r($references);
            if (count($references) != 0) {
                // We have this reference in our database
                switch ($format) {
                    case 'json':
                        // Display object
                        $reference = db_retrieve_reference($references[0]);
                        header("Content-type: text/plain; charset=utf-8\n\n");
                        if ($callback != '') {
                            echo $callback . '(';
                        }
                        echo json_format(json_encode($reference));
                        if ($callback != '') {
                            echo ')';
                        }
                        break;
                    case 'html':
                    default:
                        // Redirect to reference display
                        header('Location: reference/' . $references[0] . "\n\n");
                        break;
                }
                exit;
            }
        }
    }
    // Output search results in various formats...
    switch ($format) {
        case 'json':
            display_bhl_result_json($referent, $search_hits, $callback);
            break;
        case 'html':
        default:
            display_bhl_result_html($referent, $search_hits);
            break;
    }
}
Ejemplo n.º 3
0
function test_bhl_find()
{
    $tests = array();
    //----------------------------------------------------------------------------------------------
    // Journal of Hymenoptera Research
    array_push($tests, array('title' => 'Journal of Hymenoptera Research', 'volume' => 6, 'spage' => 256, 'PageID' => 4491707));
    // Multiple pages in same item (multiple volumes)
    array_push($tests, array('title' => 'Journal of Hymenoptera Research', 'volume' => 8, 'spage' => 1, 'PageID' => 4491014));
    //----------------------------------------------------------------------------------------------
    // Fieldiana
    array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 31, 'spage' => 149, 'PageID' => 2763486));
    array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 39, 'spage' => 577, 'PageID' => 2866715));
    // Two hits
    array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 73, 'spage' => 49, 'PageID' => 2759622));
    array_push($tests, array('title' => 'Fieldiana, Zoology', 'volume' => 77, 'spage' => 1, 'PageID' => 2866529));
    //----------------------------------------------------------------------------------------------
    // University of Kansas Science Bulletin
    array_push($tests, array('title' => 'University of Kansas Science Bulletin', 'volume' => 35, 'spage' => 577, 'PageID' => 4413503));
    //----------------------------------------------------------------------------------------------
    // Bulletin of Zoological Nomenclature
    array_push($tests, array('title' => 'Bulletin of Zoological Nomenclature', 'volume' => 23, 'spage' => 169, 'PageID' => 12222978));
    //----------------------------------------------------------------------------------------------
    // Proceedings of the California Academy of Sciences
    array_push($tests, array('title' => 'Proceedings of the California Academy of Sciences', 'volume' => 47, 'spage' => 47, 'PageID' => 15776069));
    //----------------------------------------------------------------------------------------------
    // Ann Mag Nat Hist
    array_push($tests, array('title' => 'Ann Mag Nat Hist', 'volume' => 20, 'spage' => 413, 'series' => 8, 'PageID' => 15611435));
    //----------------------------------------------------------------------------------------------
    // Bulletin of the British Museum (Natural History). Zoology
    array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 34, 'spage' => 65, 'PageID' => 2261841));
    array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 27, 'spage' => 65, 'PageID' => 2261309));
    array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History). Zoology', 'volume' => 27, 'spage' => 59, 'PageID' => 2261319));
    //----------------------------------------------------------------------------------------------
    // Bulletin of the British Museum (Natural History). Entomology
    array_push($tests, array('title' => 'Bulletin of the British Museum (Natural History): Entomology', 'volume' => 12, 'spage' => 247, 'PageID' => 2298342));
    //----------------------------------------------------------------------------------------------
    // Memoirs of the Museum of Comparative Zoölogy
    array_push($tests, array('title' => 'Memoirs of the Museum of Comparative Zoölogy', 'volume' => 50, 'spage' => 85, 'PageID' => 15776069));
    //----------------------------------------------------------------------------------------------
    // Proc. ent. Soc. Wash.
    // Banks, N. (1899b). Some spiders from northern Louisiana. Proc. ent. Soc. Wash. 4: 188-195.
    array_push($tests, array('title' => 'Proc. ent. Soc. Wash.', 'volume' => 4, 'spage' => 188, 'PageID' => 2299619));
    //----------------------------------------------------------------------------------------------
    // Ann. Soc. ent. Fr.
    // Simon, E. (1885c). Etudes arachnologiques. 17e Mémoire. XXIV. Arachnides recuellis dans la
    // vallée de Tempé et sur le mont Ossa (Thessalie). Ann. Soc. ent. Fr. (6) 5: 209-218.
    array_push($tests, array('title' => 'Ann. Soc. ent. Fr.', 'volume' => 5, 'spage' => 209, 'series' => 6, 'PageID' => 10171703));
    //----------------------------------------------------------------------------------------------
    // Mitteilungen der Schweizerischen Entomologischen Gesellschaft
    // Forel A (1887) Fourmis récoltées à Madagascar par le Dr. Conrad Keller. Mitteilungen der Schweizerischen Entomologischen Gesellschaft 7: 381–389.
    array_push($tests, array('title' => 'Mitteilungen der Schweizerischen Entomologischen Gesellschaft', 'volume' => 7, 'spage' => 381, 'PageID' => 10395996));
    //----------------------------------------------------------------------------------------------
    // Revue zoologique africaine
    array_push($tests, array('title' => 'Revue zoologique africaine', 'volume' => 9, 'spage' => 1, 'PageID' => 4491707));
    echo '<pre>';
    $ok = 0;
    $failed = array();
    foreach ($tests as $test) {
        echo $test['title'] . ' ' . $test['volume'] . ' ' . $test['spage'] . ' ...';
        $search_hits = bhl_find_article($test['title'], $test['volume'], $test['spage'], isset($test['series']) ? $test['series'] : '');
        $hits = $search_hits;
        $matched = in_array($test['PageID'], $hits->hits);
        if ($matched) {
            $ok++;
            echo " [" . count($hits->hits) . "] ok\n";
        } else {
            echo " not found\n";
            array_push($failed, array($test, $hits));
        }
    }
    // Report
    echo count($tests) . ' references, ' . (count($tests) - $ok) . ' failed' . "\n";
    print_r($failed);
    echo '</pre>';
}