function find_specimens($text, $format = 'json')
{
    $obj = new stdclass();
    $obj->text = $text;
    $obj->codes = extract_specimen_codes($text);
    switch ($format) {
        case 'json':
            $obj->text = html_entity_decode($obj->text, ENT_QUOTES, 'UTF-8');
            echo json_format(json_encode($obj));
            break;
        case 'html':
        default:
            echo '<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<style type="text/css" title="text/css">
	body {
		font-family: sans-serif;
		margin:20px;
		}
</style>
<a href="specimenparser.php">Home</a>
<title>Specimen parser</title>
</head>
<body>
<h1>Specimen parser results</h1>
<h2>Specimen codes</h2>
<ul>';
            foreach ($obj->codes as $code) {
                echo '<li>' . $code . '</li>';
            }
            echo '</ul>
<h2>Input</h2>
<p>' . htmlentities($obj->text, ENT_QUOTES, 'UTF-8') . '</p>
</body>
</html>';
            break;
    }
}
Example #2
0
function specimens_from_reference($reference_id)
{
    global $db;
    // delete any existing specimens
    $sql = 'DELETE FROM rdmp_reference_specimen_joiner WHERE reference_id=' . $reference_id;
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    $pages = bhl_retrieve_reference_pages($reference_id);
    $page_ids = array();
    foreach ($pages as $p) {
        $page_ids[] = $p->PageID;
    }
    //echo "PageIDs:\n";
    //print_r($page_ids);
    $text = bhl_fetch_text_for_pages($page_ids);
    $text = str_replace('\\n', "\n", $text);
    $text = str_replace("\n ", "\n", $text);
    $specimens = extract_specimen_codes($text);
    $extra = array();
    foreach ($specimens as $code) {
        $extra = array_merge($extra, extend_specimens($code, $text));
    }
    $specimens = array_unique(array_merge($specimens, $extra));
    sort($specimens);
    if (count($specimens) == 0) {
        // none found, insert NULL entry to flag that we've processed this reference
        $sql = 'INSERT INTO rdmp_reference_specimen_joiner(reference_id,code) VALUES(' . $reference_id . ',NULL)';
    } else {
        foreach ($specimens as $code) {
            $sql = 'INSERT INTO rdmp_reference_specimen_joiner(reference_id,code) VALUES(' . $reference_id . ',' . $db->qstr($code) . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    /*
    $nm = bhl_names_in_reference_by_page($reference_id);
    $nm->names;
    
    // Get majority rule taxon (what paper is about)
    $tags = array();
    foreach ($nm->names as $name)
    {
    	$tags[] = $name->namestring;
    }
    
    $paths = get_paths($tags);
    $majority_rule = majority_rule_path($paths);
    $expanded = expand_path($majority_rule);
    
    print_r($expanded);
    */
    return $specimens;
}
Example #3
0
    array_push($samples, 'TNHC63518');
    array_push($samples, 'FIGURES 1≠6. Adults and male genitalia. 1, Schinia immaculata, male, Arizona, Coconino Co.
	Colorado River, Grand Canyon, river mile 166.5 L, USNMENT 00229965; 2, S biundulata, female,
	Nevada, Humboldt Co. Sulphur, USNMENT 00220807; 3, S. immaculata, male genitalia; 4, S.
	immaculata, aedoeagus; 5, S. biundulata, male genitalia; 6, S. biundulata, aedoeagus.
	Material Examined. PARATYPES (3∞): U.S.A.: ARIZONA: COCONINO CO. 1∞
	same data as holotype except: USNM ENT 00210120 (NAU); river mile 166.5 L, old high
	water, 36.2542 N, 112.8996 W, 14 Apr. 2003 (1∞), R. J. Delph, USNM ENT 00219965
	(USNM); river mile 202 R, new high water, 36.0526 N, 113.3489 W, 15 May 2001 (1∞), J.
	Rundall, USNM ENT 00210119 (NAU). Paratypes deposited in the National Museum of
	Natural History, Washington, DC (USNM) and Northern Arizona University, Flagstaff,
AZ (NAU).');
    array_push($samples, 'Material examined. ≠ Holotype - male, 30.3 mm SVL, WHT 5862, Hunnasgiriya (Knuckles), elevation 1,100 m (07∫23\' N, 80∫41\' E), coll. 17 Oct.2003. Paratypes - females, 35.0 mm SVL, WHT 2477, Corbett\'s Gap (Knuckles), 1,245 m (07∫ 22\' N, 80∫ 51\' E) coll. 6 Jun.1999; 33.8 mm SVL, WHT 6124, Corbett\'s Gap (Knuckles), 1,245 m (07∫ 22\' N, 80∫ 51\' E) coll. 16 Jun.2004; males, 30.3 mm SVL, WHT 5868, Hunnasgiriya, same data as holotype, coll. 16 Oct.2003; 31.3 mm');
    $ok = 0;
    foreach ($samples as $str) {
        $s = extract_specimen_codes($str, $matches);
        $matched = count($s);
        if ($matched > 0) {
            $specimens = array_merge($specimens, $s);
            $ok++;
        } else {
            array_push($failed, $str);
        }
    }
    // report
    echo "--------------------------\n";
    echo count($samples) . ' samples, ' . (count($samples) - $ok) . ' failed' . "\n";
    print_r($failed);
    print_r($specimens);
    // Post process specimens
}
Example #4
0
function gb_specimen_code(&$data)
{
    // get taxonomic group (we might need this even if we lack a specimen code)
    $voucher_code = '';
    if ('' == $voucher_code) {
        if (isset($data->source->specimen_voucher)) {
            if (preg_match('/^CASENT/', $data->source->specimen_voucher)) {
                $voucher_code = $data->source->specimen_voucher;
            }
        } else {
            // Try isolate field
            if (isset($data->source->isolate)) {
                if (preg_match('/^CASENT/', $data->source->isolate)) {
                    $voucher_code = $data->source->isolate;
                }
            }
        }
    }
    echo $voucher_code;
    if ('' == $voucher_code) {
        if (isset($data->source->specimen_voucher)) {
            $v = $data->source->specimen_voucher;
            // clean
            // Cases such as EF629441 have colons in the specimen name
            $v = str_replace(":", " ", $v);
            //  AY193412 has - in name
            $v = str_replace("-", " ", $v);
            //echo "v=$v\n";
            $ids = extract_specimen_codes($v);
            //print_r($ids);
            if (count($ids) == 1) {
                $voucher_code = $ids[0];
            }
        }
    }
    if ('' == $voucher_code) {
        // Try isolate field
        if (isset($data->source->isolate)) {
            $ids = extract_specimen_codes($data->source->isolate);
            if (count($ids) == 1) {
                $voucher_code = $ids[0];
            }
        }
    }
    if ($voucher_code != '') {
        $data->source->specimen_code = $voucher_code;
        //echo "voucher=$voucher\n";
        // Can we get linked data?
        $collectionCode = '';
        if (isset($data->taxonomic_group)) {
            switch ($data->taxonomic_group) {
                case 'Amphibia':
                case 'Reptiles':
                    $collectionCode = 'Herps';
                    // default
                    $parts = split(" ", $voucher_code);
                    switch ($parts[0]) {
                        // MCZ
                        case 'MCZ':
                            if ($data->taxonomic_group == 'Amphibia') {
                                $collectionCode = 'Amph';
                            } else {
                                $collectionCode = 'Rept';
                            }
                            break;
                            // LACM
                        // LACM
                        case 'LACM':
                            $collectionCode = '';
                            break;
                            // Australian stuff
                        // Australian stuff
                        case 'AM':
                        case 'SAMA':
                        case 'ANWC':
                        case 'AMS':
                        case 'WAM':
                        case 'NT':
                            $collectionCode = '';
                            break;
                        default:
                            break;
                    }
                    break;
                default:
                    $collectionCode = $data->taxonomic_group;
                    break;
            }
            $parts = split(" ", $voucher_code);
            $url = 'http://bioguid.info/openurl?genre=specimen&institutionCode=' . $parts[0] . '&collectionCode=' . $collectionCode . '&catalogNumber=' . $parts[1] . '&display=json';
            //echo $url;
            // fetch
            $json = get($url);
            //echo $json;
            $j = json_decode($json);
            if (isset($j->title)) {
                $data->source->specimen = $j;
            }
        }
        // Special handling of CASENT (tapir)
        if (preg_match('/^CASENT/', $voucher_code)) {
            $data->source->specimen = new stdclass();
            $data->source->specimen->guid = 'antweb:' . str_replace(' ', '', strtolower($voucher_code));
        }
    }
}
	Nevada, Humboldt Co. Sulphur, USNMENT 00220807; 3, S. immaculata, male genitalia; 4, S.
	immaculata, aedoeagus; 5, S. biundulata, male genitalia; 6, S. biundulata, aedoeagus.
	Material Examined. PARATYPES (3∞): U.S.A.: ARIZONA: COCONINO CO. 1∞
	same data as holotype except: USNM ENT 00210120 (NAU); river mile 166.5 L, old high
	water, 36.2542 N, 112.8996 W, 14 Apr. 2003 (1∞), R. J. Delph, USNM ENT 00219965
	(USNM); river mile 202 R, new high water, 36.0526 N, 113.3489 W, 15 May 2001 (1∞), J.
	Rundall, USNM ENT 00210119 (NAU). Paratypes deposited in the National Museum of
	Natural History, Washington, DC (USNM) and Northern Arizona University, Flagstaff,
AZ (NAU).');
    //array_push($samples, 'Material examined. ≠ Holotype - male, 30.3 mm SVL, WHT 5862, Hunnasgiriya (Knuckles), elevation 1,100 m (07∫23\' N, 80∫41\' E), coll. 17 Oct.2003. Paratypes - females, 35.0 mm SVL, WHT 2477, Corbett\'s Gap (Knuckles), 1,245 m (07∫ 22\' N, 80∫ 51\' E) coll. 6 Jun.1999; 33.8 mm SVL, WHT 6124, Corbett\'s Gap (Knuckles), 1,245 m (07∫ 22\' N, 80∫ 51\' E) coll. 16 Jun.2004; males, 30.3 mm SVL, WHT 5868, Hunnasgiriya, same data as holotype, coll. 16 Oct.2003; 31.3 mm');
    $samples = array();
    //$samples[] ="SÃO PAULO: 1. Teodoro Sampaio, (−22.52, −52.17), MZUSP 8885, 25819; 2. Estação Biológica de Boracéia, Salesópolis, (−23.65, −45.9), USNM 460569; 3. Parque Estadual da Serra do Mar, Núcleo Santa Virgínia, 10 km NW Ubatuba, (−23.36, −45.13), 850 m, NSV 160599. PARANÁ: 4. Parque Barigüi, Bairro Mercês, Curitiba, (−25.42, −49.30), 861 m, MHNCI 2599. SANTA CATARINA: 5. Ilha de Santa Catarina, (−27.6, −48.5), BMNH 50.7.8.24, 50.7.8.25, 7.1.1.174; 6. Serra do Tabuleiro, (−27.83, −48.78), JCV 28. RIO GRANDE DO SUL: 7. Parque Nacional dos Aparados da Serra, Cambará do Sul, (−29.25, −49.83), 800 m, MCNU 829; 8. Aratiba, (−27.27, −52.32), 420 m, MZUSP 33474 (holotype), MZUSP 33475, MCNU 826, 827, 831, 833–838, 840 (paratypes), MCNU 829. UNKNOWN LOCALITY: probably from the state of Minas Gerais, UFMG 3015.";
    $samples[] = "We employed DNA barcoding as a third source of standardized data for species identification. We sequenced two mitochondrial DNA barcode markers for amphibians, the 5’ end of the cytochrome oxidase I (COI) gene and a fragment of the ribosomal 16S gene, using published primers and protocols (Vences et al. 2005; Smith et al. 2008; Crawford et al. 2010). GenBank accession numbers for each gene (COI, 16S) for each Panamanian specimen are as follows: MVUP 2042 (JF769001, JF769004) and AJC 2067 (JF769000, JF769003). We also obtained sequence data from one E. planirostris from Havana, Cuba, deposited in the Museum of Natural History “Felipe Poey”, Havana, with specimen number MFP.11512 (JF769002, JF769005). Gene sequences and metadata were also deposited at Barcode of Life Data Systems (Ratnasingham & Hebert 2007) under project code “BSINV”. Species identification utilized character-based phylogenetic inference and genetic distances (Goldstein & DeSalle 2011), as well as qualitative observations of morphology and advertisement call.\nWe compared the 16S DNA data with 16 closely related sequences (Frost et al. 2006; Heinicke et al. 2007) from GenBank (Fig. 1). Note, specimen USNM 564984 is currently identified as P. casparii in GenBank EF493599, but was re-identified as P. planirostris in Heinicke et al. (2011). Excluding gapped sites, the alignment contained 518 base pairs (bp), of which 57 were parsimony-informative and 37 were singletons. Phylogenetic analysis of 16S data followed protocols in Crawford et al. (2010). Parsimony inference resulted in 4 shortest trees of 148 steps (not shown), with support measured by 2,000 boostrap pseudoreplicates. A maximum likelihood-based tree (-Ln score = 1520.37670) is shown in Fig 1.";
    $ok = 0;
    foreach ($samples as $str) {
        $s = extract_specimen_codes($str);
        $matched = count($s);
        if ($matched > 0) {
            $specimens = array_merge($specimens, $s);
            $ok++;
        } else {
            array_push($failed, $str);
        }
    }
    // report
    echo "--------------------------\n";
    echo count($samples) . ' samples, ' . (count($samples) - $ok) . ' failed' . "\n";
    print_r($failed);
    print_r($specimens);
    // Post process specimens
}