示例#1
0
文件: cinii.php 项目: rdmpage/bioguid
        		}
        		*/
    }
    return $found;
}
// test
if (0) {
    $item = new stdClass();
    //search_cinii('', '0003-5092', 18, 185, 0, 1939, $item);
    //search_cinii('','1343-8786', 2, 439, 445, 1999, $item);
    // 1(2), June 25 1998: 233-239.
    // Entomological Science
    //search_cinii('','1343-8786', 1, 233, 239, 1998, $item);
    // ANNOTATIONES ZOOLOGICAE JAPONENSES
    //search_cinii('', '0003-5092', 56, 338, 350, 1983, $item);
    // Entomological REview
    //search_cinii('', '0286-9810', 58, 1, 6, 2003, $item);
    //search_cinii('', '0286-9810', 58, 1, 6, 2003, $item);
    //search_cinii('', '0286-9810', 61, 119, 126, 2006, $item);
    // new
    //search_cinii('', '0286-9810', 58, 1, $item);
    //search_cinii('', '0003-5092', 56, 338, $item);
    search_cinii('', '1343-8786', 2, 439, $item);
    print_r($item);
    if (find_in_cache($item)) {
        echo "already exists\n";
    } else {
        echo "store\n";
        store_in_cache($item);
    }
}
示例#2
0
                }
                if ($formatted_date != '') {
                    $obj->date = $formatted_date;
                }
                if ($year != '') {
                    $obj->year = $year;
                }
            }
        }
        // cleanup
        if (!isset($obj->url) && !isset($obj->doi)) {
            $obj->url = $obj->zotero;
        }
        // ISSN lookup
        if (!isset($obj->issn) && 'article' == $obj->genre) {
            $issn = issn_from_journal_title($obj->title);
            if ('' != $issn) {
                $obj->issn = $issn;
            }
        }
        print_r($obj);
        // Store
        if (find_in_cache($obj) == 0) {
            if (isset($obj->issn)) {
                if ($obj->issn != '') {
                    store_in_cache($obj);
                }
            }
        }
    }
}
示例#3
0
function parse_nuytsia($url)
{
    $text = get($url);
    $text = str_replace("\n", "", $text);
    $text = str_replace("\r", "", $text);
    $text = str_replace("\t", "", $text);
    $paras = explode('</p>', $text);
    foreach ($paras as $pp) {
        $pp .= '</p>';
        if (preg_match('/class="article"/', $pp)) {
            $item = new stdclass();
            $item->authors = array();
            if (preg_match('/<\\/a>(?<authors>(.*))\\((?<year>[0-9]{4})\\)./', $pp, $match)) {
                $item->year = $match['year'];
                $authors = $match['authors'];
                $authors = preg_replace('/(.*)<\\/a>/', '', $authors);
                $authors = str_replace(' AND ', ', ', $authors);
                $a = explode('.,', trim($authors));
                foreach ($a as $value) {
                    if ($value != '') {
                        $value = trim($value);
                        $value .= ".";
                        // Space initials nicely
                        $value = preg_replace("/\\.([A-Z])/", ". \$1", $value);
                        // Make nice
                        $value = mb_convert_case($value, MB_CASE_TITLE, mb_detect_encoding($value));
                        // Get parts of name
                        $parts = parse_name($value);
                        $author = new stdClass();
                        if (isset($parts['last'])) {
                            $author->lastname = $parts['last'];
                        }
                        if (isset($parts['suffix'])) {
                            $author->suffix = $parts['suffix'];
                        }
                        if (isset($parts['first'])) {
                            $author->forename = $parts['first'];
                            if (array_key_exists('middle', $parts)) {
                                $author->forename .= ' ' . $parts['middle'];
                            }
                        }
                        array_push($item->authors, $author);
                    }
                }
            }
            if (preg_match('/<a href="(?<pdf>(http:\\/\\/www.dec.wa.gov.au(.*)\\.pdf))/', $pp, $match)) {
                $item->pdf = $match['pdf'];
            }
            if (preg_match('/\\([0-9]{4}\\).(?<atitle>(.*))<i>Nuytsia<\\/i>/', $pp, $match)) {
                $item->atitle = strip_tags($match['atitle']);
            }
            // <i>Nuytsia</i> <u>19</u> (1) : 191–196
            // page separator is en dash 2013
            if (preg_match('/<i>(Nuytsia)<\\/i> <u>(?<volume>(.*))<\\/u>\\s*\\((?<issue>(.*))\\)\\s*:\\s*(?<spage>[0-9]+)–(?<epage>(.*))\\.<\\/p>/', $pp, $match)) {
                $item->title = 'Nuytsia';
                $item->volume = $match['volume'];
                $item->issue = $match['issue'];
                $item->spage = $match['spage'];
                $item->epage = $match['epage'];
                $item->issn = '0085-4417';
            }
            print_r($item);
            // Store reference here...
            if (find_in_cache($item) == 0) {
                store_in_cache($item);
            }
        }
    }
}
示例#4
0
 function find_article_from_page($values, &$item)
 {
     global $debug;
     $found = false;
     // Is it in our cache?
     $tmp_item = new stdClass();
     $tmp_item->issn = $values['issn'];
     $tmp_item->volume = $values['volume'];
     $tmp_item->pages = $values['pages'];
     $cache_id = find_in_cache_from_page($tmp_item);
     if ($cache_id != 0) {
         $item = retrieve_from_db($cache_id);
         $found = true;
     } else {
         // Off to the Cloud...
         // For now limit ourselves to CrossRef
         $year = '';
         if (array_key_exists('date', $values)) {
             $year = $values['date'];
         }
         if (in_crossref($values['issn'], $year, $values['volume'])) {
             //echo 'Should be in CrossRef';
             $max_tries = 50;
             $doi = '';
             $page = $values['pages'];
             $upper_bound = $page;
             // save the original starting page
             $count = 0;
             while (!$found && $count < $max_tries && $page >= 0) {
                 if ($debug) {
                     echo $count, '.';
                 }
                 $doi = search_for_doi($values['issn'], $values['volume'], $page, $values['genre'], $item);
                 if ($doi == '') {
                     // Decrease page
                     $page--;
                     // We might now be in the range of a previously found range
                     $tmp_item->issn = $values['issn'];
                     $tmp_item->volume = $values['volume'];
                     $tmp_item->pages = $page;
                     $cache_id = find_in_cache_from_page($tmp_item);
                     if ($cache_id != 0) {
                         $item = retrieve_from_db($cache_id);
                         $found = true;
                         // Update upper bound
                         update_page_upperbound($cache_id, $upper_bound);
                     }
                 } else {
                     $found = true;
                     $cache_id = find_in_cache($item);
                     if ($cache_id == 0) {
                         $cache_id = store_in_cache($item);
                     }
                     // Update upper bound
                     update_page_upperbound($cache_id, $upper_bound);
                     //echo 'got it!';
                 }
                 $count++;
             }
         }
         // OK, try JSTOR (gulp)
         if (!$found) {
             if ($debug) {
                 echo '<p>Trying JSTOR ' . $values['issn'] . '</p>';
                 if (in_jstor($values['issn'], $values['date'])) {
                     echo "in JSTOR\n";
                 }
             }
             if (enough_for_jstor_lookup($values) && in_jstor($values['issn'], $values['date'])) {
                 $max_tries = 20;
                 $page = $values['pages'];
                 $upper_bound = $page;
                 // save the original starting page
                 $temp_values = $values;
                 $count = 0;
                 while (!$found && $count < $max_tries && $page >= 0) {
                     if ($debug) {
                         echo $count, '.';
                     }
                     $temp_values['spage'] = $page;
                     $sici = sici_from_meta($temp_values);
                     if ($debug) {
                         print_r($temp_values);
                         echo urlencode($sici);
                     }
                     $found = jstor_metadata($sici, $item);
                     if (!$found) {
                         // Decrease page
                         $page--;
                         $temp_values['spage'] = $page;
                         // We might now be in the range of a previously found range
                         $tmp_item->issn = $values['issn'];
                         $tmp_item->volume = $values['volume'];
                         $tmp_item->pages = $page;
                         $cache_id = find_in_cache_from_page($tmp_item);
                         if ($cache_id != 0) {
                             $item = retrieve_from_db($cache_id);
                             $found = true;
                             // Update upper bound
                             update_page_upperbound($cache_id, $upper_bound);
                         }
                     } else {
                         $cache_id = find_in_cache($item);
                         if ($cache_id == 0) {
                             $cache_id = store_in_cache($item);
                         }
                         // Update upper bound
                         update_page_upperbound($cache_id, $upper_bound);
                         //echo 'got it!';
                     }
                     $count++;
                 }
             } else {
                 if ($debug) {
                     echo '<p>Not enough for JSTOR lookup, or out of range ' . $values['issn'] . ' ' . $values['date'] . '</p>';
                 }
             }
         }
     }
     return $found;
 }
示例#5
0
文件: ris.php 项目: rdmpage/bioguid
/**
 * @brief Import bibliograpohic data from a RIS file
 *
 */
function import_ris($ris)
{
    global $debug;
    $rows = split("\n", $ris);
    if ($debug) {
        echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
        print_r($rows);
        echo "</pre>";
    }
    $state = 1;
    $genre = '';
    foreach ($rows as $r) {
        $parts = split("  - ", $r);
        $key = '';
        if (isset($parts[1])) {
            $key = $parts[0];
            $value = trim($parts[1]);
            // clean up any leading and trailing spaces
        }
        if (isset($key) && $key == 'TY') {
            $state = 1;
            $obj = new stdClass();
            $obj->authors = array();
            if ('JOUR' == $value) {
                $genre = 'article';
            }
        }
        if (isset($key) && $key == 'ER') {
            $state = 0;
            if ($debug) {
                echo 'Line: ' . __LINE__ . "\n";
                echo "\n=== Import this object ==\n";
            }
            // ISSN lookup
            if (!isset($obj->issn) && 'article' == $genre) {
                $issn = issn_from_journal_title($obj->title);
                if ('' != $issn) {
                    $obj->issn = $issn;
                }
            }
            // to do: we might want to do a DOI lookup here to get more GUIDs...
            if (!isset($obj->doi)) {
                if (in_crossref($obj->issn, $obj->year, $obj->volume)) {
                    $item = new stdclass();
                    $doi = search_for_doi($obj->issn, $obj->volume, $obj->spage, 'article', $item);
                    if ($doi != '') {
                        $obj->doi = $doi;
                        // Fix missing metadata
                        if (!isset($obj->epage) && isset($item->epage)) {
                            $obj->epage = $item->epage;
                        }
                    }
                }
            }
            // http://en.wikipedia.org/wiki/Chinese_name
            // For some journals (e.g., Chinese) we need to reverse the name parts returned
            // by parse_name
            //echo "boo1 |" . $obj->issn . "|\n";
            switch ($obj->issn) {
                case '0529-1526':
                    // Acta Phytotaxonomica Sinica
                    for ($i = 0; $i < count($obj->authors); $i++) {
                        $tmp = $obj->authors[$i]->forename;
                        $obj->authors[$i]->forename = $obj->authors[$i]->lastname;
                        $obj->authors[$i]->lastname = $tmp;
                    }
                    break;
                default:
                    break;
            }
            // Cleaning...
            if ($debug) {
                echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
                print_r($obj);
                echo "</pre>";
            }
            // Store reference here...
            if (find_in_cache($obj) == 0) {
                if (isset($obj->issn)) {
                    if ($obj->issn != '') {
                        store_in_cache($obj);
                    }
                }
                /* for Pac Sci
                			if (isset($obj->volume))
                			{
                				if ($obj->volume != '')
                				{
                					store_in_cache($obj);
                				}
                			}
                			*/
            }
        }
        if ($state == 1) {
            if (isset($value)) {
                process_ris_key($key, $value, $obj);
            }
        }
    }
}