public function check_st_john($start, $limit)
 {
     $query = "SELECT id, string FROM names WHERE string REGEXP BINARY 'st\\\\\\.-[a-z]'\n            AND id BETWEEN {$start} AND " . ($start + $limit - 1);
     foreach ($this->mysqli->iterate_file($query) as $row) {
         $id = $row[0];
         $string = $row[1];
         $canonical_form_string = Functions::canonical_form($string);
         if ($canonical_form = CanonicalForm::find_or_create_by_string($canonical_form_string)) {
             echo "UPDATE names SET canonical_form_id={$canonical_form->id} WHERE id={$id}\n";
             $this->mysqli->update("UPDATE names SET canonical_form_id={$canonical_form->id}, ranked_canonical_form_id={$canonical_form->id} WHERE id={$id}");
         }
     }
     $this->mysqli->commit();
 }
 private function get_details($xml, $orig_sciname, $strict)
 {
     $taxa = array();
     foreach ($xml->entry as $species) {
         if ($strict) {
             if (strtolower(trim($orig_sciname)) == strtolower(trim(Functions::canonical_form(trim($species->title))))) {
                 $taxon_do = self::get_objects_info($species->id, $species->title, $orig_sciname);
                 $taxa[] = $taxon_do;
             }
         } else {
             $taxon_do = self::get_objects_info($species->id, $species->title, $orig_sciname);
             $taxa[] = $taxon_do;
         }
     }
     return $taxa;
 }
예제 #3
0
 function canonical_forms()
 {
     $mysqli =& $GLOBALS['mysqli_connection'];
     $mysqli->begin_transaction();
     $mysqli->delete("DELETE FROM canonical_forms");
     $mysqli->update("UPDATE names SET canonical_form_id=0");
     $result = $mysqli->query("SELECT MAX(id) as max FROM names");
     $row = $result->fetch_assoc();
     $max = $row["max"];
     $start = 1;
     $interval = 50000;
     while ($start < $max) {
         debug($start);
         $result = $mysqli->query("SELECT id, string FROM names WHERE id BETWEEN {$start} AND " . ($start + $interval - 2) . " AND canonical_form_verified!=0");
         while ($result && ($row = $result->fetch_assoc())) {
             $id = $row["id"];
             $string = $row["string"];
             $canonical_form_id = $row["id"];
             $canonical_form_verified = $row["id"];
             $canonical_form = "";
             if ($canonical_form_verified) {
                 $result2 = $mysqli->query("SELECT string FROM canonical_forms WHERE id={$canonical_form_id}");
                 if ($result2 && ($row2 = $result2->fetch_assoc())) {
                     $canonical_form = $row2["string"];
                 }
             }
             if (!$canonical_form) {
                 $canonical_form = Functions::canonical_form($string);
             }
             if (@(!$canonical_form_ids[$canonical_form])) {
                 $result2 = $mysqli->query("SELECT id FROM canonical_forms WHERE string='" . $mysqli->escape($canonical_form) . "'");
                 if ($result2 && ($row2 = $result2->fetch_assoc())) {
                     $canonical_form_ids[$canonical_form] = $row2["id"];
                 } else {
                     $result2 = $mysqli->insert("INSERT INTO canonical_forms VALUES (NULL,'" . $mysqli->escape($canonical_form) . "')");
                     $canonical_form_ids[$canonical_form] = $mysqli->insert_id;
                 }
             }
             $query = "UPDATE names SET canonical_form_id ({$id},'" . $mysqli->escape($canonical_form) . "')";
             $mysqli->update($query);
         }
         flush();
         $start += $interval;
     }
     $mysqli->end_transaction();
 }
예제 #4
0
 function get_details($xml, $orig_sciname, $strict)
 {
     $taxa = array();
     foreach ($xml->entry as $species) {
         if ($strict == 'canonical_match') {
             if (strtolower(trim($orig_sciname)) == strtolower(trim(Functions::canonical_form(trim($species->title))))) {
                 print "<br>" . strtolower(trim($orig_sciname)) . " == " . strtolower(trim(Functions::canonical_form(trim($species->title)))) . " == " . $species->title . "<br>";
                 $taxon_do = self::get_objects_info($species->id, $species->title, $orig_sciname);
                 $taxa[] = $taxon_do;
             }
         } elseif ($strict == 'exact_string') {
             if (strtolower(trim($orig_sciname)) == strtolower(trim($species->title))) {
                 print "<br>" . strtolower(trim($orig_sciname)) . " == " . $species->title . "<br>";
                 $taxon_do = self::get_objects_info($species->id, $species->title, $orig_sciname);
                 $taxa[] = $taxon_do;
             }
         } else {
             $taxon_do = self::get_objects_info($species->id, $species->title, $orig_sciname);
             $taxa[] = $taxon_do;
         }
     }
     return $taxa;
 }
예제 #5
0
 private function is_sciname_synonym($sciname)
 {
     /*
     Squatarola squatarola
     http://eol.org/api/search/1.0.xml?q=Xanthopsar+flavus&page=1&exact=false&filter_by_taxon_concept_id=&filter_by_hierarchy_entry_id=&filter_by_string=&cache_ttl=
     http://eol.org/api/pages/1.0.xml?batch=false&id=686274&images_per_page=0&images_page=0&videos_per_page=0&videos_page=0&sounds_per_page=0&sounds_page=0&maps_per_page=0&maps_page=0&texts_per_page=0&texts_page=0&iucn=false&subjects=overview&licenses=all&details=false&common_names=false&synonyms=true&references=false&taxonomy=false&vetted=0&cache_ttl=&language=en
     */
     $search_call = "http://eol.org/api/search/1.0.xml?q=" . $sciname . "&page=1&exact=false&filter_by_taxon_concept_id=&filter_by_hierarchy_entry_id=&filter_by_string=&cache_ttl=";
     if ($xml = Functions::lookup_with_cache($search_call, array('timeout' => 30, 'expire_seconds' => false, 'resource_id' => 'eol_api'))) {
         $xml = simplexml_load_string($xml);
         $sciname = Functions::canonical_form($sciname);
         if ($sciname == Functions::canonical_form($xml->entry[0]->title)) {
             return false;
         } else {
             $titles = array();
             echo "\n" . $xml->entry[0]->id . "\n";
             foreach ($xml->entry as $entry) {
                 $titles[] = Functions::canonical_form($entry->title);
             }
             // print_r($titles);
             if (in_array($sciname, $titles)) {
                 return false;
             } else {
                 return true;
             }
         }
     }
     return false;
 }
 private function process_mushroom_observer_list($wrong_urls)
 {
     if ($file = Functions::lookup_with_cache($this->mushroom_observer_eol, $this->download_options)) {
         $xml = simplexml_load_string($file);
         $i = 0;
         $total = count($xml->taxon);
         foreach ($xml->taxon as $t) {
             $i++;
             // if($i > 40) break; //debug
             $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
             $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
             $sciname = Functions::import_decode($t_dwc->ScientificName);
             $sciname = Functions::canonical_form($sciname);
             echo "\n{$i} of {$total}: {$sciname}";
             $url = "http://en.wikipedia.org/wiki/" . str_replace(" ", "_", $sciname);
             self::get_triple($url, $wrong_urls);
         }
     }
 }
 private function get_vernacular_names()
 {
     $temp_filepath = Functions::save_remote_file_to_local($this->vernacular_path, array('timeout' => 4800, 'download_attempts' => 5));
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         if ($line) {
             $fields = explode("\t", trim($line));
             $fields = array_map('trim', $fields);
             //trims all array values in the array
             $common_name = @$fields[1];
             $sciname = Functions::canonical_form(trim(@$fields[0]));
             $taxon_id = @$this->taxa_all[$sciname]['Identifier'];
             if ($common_name == '' || $taxon_id == '' || $sciname == '') {
                 continue;
             }
             $language = self::get_language(@$fields[3]);
             $vernacular = new \eol_schema\VernacularName();
             $vernacular->taxonID = $taxon_id;
             $vernacular->vernacularName = (string) $common_name;
             $vernacular->language = $language;
             $vernacular_id = md5("{$vernacular->taxonID}|{$vernacular->vernacularName}|{$vernacular->language}");
             if (!isset($this->vernacular_name_ids[$vernacular_id])) {
                 $this->archive_builder->write_object_to_file($vernacular);
                 $this->vernacular_name_ids[$vernacular_id] = 1;
             }
         }
     }
 }
예제 #8
0
 private function generate_taxon_extension_for_dwca($rec)
 {
     /*
     <field index="0" term="http://rs.tdwg.org/dwc/terms/taxonID"/>
     <field index="2" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
     <field index="3" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>
     http://rs.tdwg.org/ontology/voc/TaxonRank#Subfamily
     http://rs.tdwg.org/ontology/voc/TaxonRank#Tribe
     http://rs.tdwg.org/ontology/voc/TaxonRank#Subtribe
     <field index="4" term="http://rs.tdwg.org/dwc/terms/genus"/>
     <field index="5" term="http://rs.tdwg.org/dwc/terms/subgenus"/>
     http://rs.tdwg.org/ontology/voc/TaxonRank#Species
     */
     // taxonID  scientificName  taxonRank   Subfamily   Tribe   Subtribe    genus   subgenus    Species
     if ($OUT = Functions::file_open($this->taxon_tab_file, "a")) {
         if ($val = @$rec['sciname2']) {
             $scientificName = $val;
         } elseif ($val = @$rec['sciname']) {
             $scientificName = $val;
         }
         fwrite($OUT, $rec['taxon_id'] . "\t");
         fwrite($OUT, $scientificName . "\t");
         fwrite($OUT, $rec['rank'] . "\t");
         fwrite($OUT, @$rec['ancestry']['subfamily']['name'] . "\t");
         fwrite($OUT, @$rec['ancestry']['tribe']['name'] . "\t");
         fwrite($OUT, @$rec['ancestry']['subtribe']['name'] . "\t");
         fwrite($OUT, @$rec['ancestry']['genus']['name'] . "\t");
         fwrite($OUT, @$rec['ancestry']['subgenus']['name'] . "\t");
         $final_species = "";
         if ($species = @$rec['ancestry']['species']['name']) {
             $final_species = Functions::canonical_form($rec['ancestry']['genus']['name']);
             if ($val = @$rec['ancestry']['subgenus']['name']) {
                 $final_species .= " (" . Functions::canonical_form($val) . ")";
             }
             $final_species .= " {$species}";
         }
         fwrite($OUT, $final_species . "\n");
         fclose($OUT);
     }
 }
예제 #9
0
 private function check_sciname_ancestry_values($taxon)
 {
     //scientificname should not be equal to any of the ancestry
     $canonical = Functions::canonical_form($taxon->scientificName);
     if ($taxon->kingdom == $canonical) {
         $taxon->kingdom = '';
     }
     if ($taxon->phylum == $canonical) {
         $taxon->phylum = '';
     }
     if ($taxon->class == $canonical) {
         $taxon->class = '';
     }
     if ($taxon->order == $canonical) {
         $taxon->order = '';
     }
     if ($taxon->family == $canonical) {
         $taxon->family = '';
     }
     if ($taxon->genus == $canonical) {
         $taxon->genus = '';
     }
     return $taxon;
 }
 private function get_taxon_id($rec)
 {
     $taxon_id = trim((string) $rec["http://rs.tdwg.org/dwc/terms/taxonID"]);
     if (!$taxon_id) {
         if ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/scientificName"])) {
             $taxon_id = str_replace(" ", "_", Functions::canonical_form($val));
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/genus"])) {
             $taxon_id = md5($val);
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/family"])) {
             $taxon_id = md5($val);
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/order"])) {
             $taxon_id = md5($val);
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/class"])) {
             $taxon_id = md5($val);
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/phylum"])) {
             $taxon_id = md5($val);
         } elseif ($val = trim((string) $rec["http://rs.tdwg.org/dwc/terms/kingdom"])) {
             $taxon_id = md5($val);
         } else {
             exit("\n got it \n");
         }
     }
     return $taxon_id;
 }
예제 #11
0
 private function manuall_add_taxon()
 {
     $records[] = array("sciname" => "Cordulegaster diastatops", "length" => "60-65", "url" => "http://www.americaninsects.net/d/cordulegaster-diastatops.html");
     $records[] = array("sciname" => "Cordulegaster bilineata", "length" => "60-65", "url" => "http://www.americaninsects.net/d/cordulegaster-diastatops.html");
     $records[] = array("sciname" => "Enallagma cyathigerum", "length" => "29-40", "url" => "http://americaninsects.net//d/enallagma-cyathigerum.html");
     $records[] = array("sciname" => "Enallagma boreale", "length" => "28-36", "url" => "http://americaninsects.net//d/enallagma-cyathigerum.html");
     foreach ($records as $rec) {
         $r = array();
         $r["sciname"] = Functions::canonical_form($rec["sciname"]);
         $r["taxon_id"] = str_replace(" ", "_", $r["sciname"]);
         $r["source"] = $rec["url"];
         self::create_instances_from_taxon_object($r);
         $r["lengths"] = array($rec["length"]);
         self::prepare_length_structured_data($r);
     }
 }
예제 #12
0
 private function get_sciname($string)
 {
     $string = self::remove_parenthesis($string);
     if ($string == "Crustaceans; phyllum Arthropoda.") {
         return "Arthropoda";
     } elseif ($string == "Sea Cucumbers, Urchins and Stars: Echinodermata") {
         return "Echinodermata";
     } elseif ($string == "Bryozoans, aka Ectoprocta") {
         return "Ectoprocta";
     } elseif ($string == "Pocillopora species close") {
         return "Pocillopora";
     } elseif ($string == "Zigzag Oyster sex") {
         return "Lopha folium";
     } elseif (in_array($string, array("Unidentified Seaweed"))) {
         return "Algae";
     } elseif ($string == "&quot;Phallic Coral&quot;, Psammocora species") {
         return "Psammocora";
     } elseif ($string == "Spikeweed, Actinotrichia fragilis maybe...") {
         return "Actinotrichia fragilis";
     } elseif (is_numeric(stripos($string, "Sargasso Seaweed"))) {
         return "Sargassum";
     } elseif (is_numeric(stripos($string, " Algae"))) {
         return "Algae";
     } elseif ($string == "Branching Red Algae Amphiroa species") {
         return "Amphiroa";
     } elseif ($string == "Avrainvillea species on Koh Phangan, Thailand") {
         return "Avrainvillea";
     } elseif ($string == "Branching Red Algae - Amphiroa valonioides") {
         return "Amphiroa valonioides";
     } elseif ($string == "Bryopsis minor with communal sea squirt") {
         return "Bryopsis minor";
     } elseif ($string == "Bryopsis on soft coral") {
         return "Bryopsis";
     } elseif ($string == "Other Monocle Bream") {
         return "Scolopsis bilineata";
     } elseif ($string == "Crimson Soldierfish Myripristis murdjan") {
         return "Myripristis murdjan";
     } elseif ($string == "Jans&#8217;s Pipefish; Doryhamphus janssi") {
         return "Doryhamphus janssi";
     } elseif ($string == "Soldier &amp; Squirrelfish; Holocentridae") {
         return "Holocentridae";
     } elseif ($string == "Other Carangidae") {
         return "Carangidae";
     } elseif ($string == "Reef Cuttlefish mating behaviour") {
         return "Sepiida";
     } elseif ($string == "Other Wrasse") {
         return "Labridae";
     } elseif ($string == "Longnose Parrotfish; Hipposcarus-harid") {
         return "Hipposcarus harid";
     } elseif ($string == "Blue spotted Flathead; Platycephalus-caeruleopunctatus") {
         return "Platycephalus caeruleopunctatus";
     } elseif ($string == "Other Rhizostome Jellyfish") {
         return "Rhizostomae";
     } elseif ($string == "Other Annelid Worms") {
         return "Annelida";
     } elseif ($string == "Other Octocorals") {
         return "Octocorallia";
     } elseif ($string == "Other Sea Cucumbers") {
         return "Echinodermata";
     } elseif ($string == "Checkered Snapper Lutjanus decussatus") {
         return "Lutjanus decussatus";
     } elseif ($string == "Other Sea Anemones") {
         return "Actiniaria";
     } elseif ($string == "Blotched Goby Coryphopterus inframaculatus") {
         return "Coryphopterus inframaculatus";
     } elseif ($string == "One-spot Snapper Lutjanus monostigma") {
         return "Lutjanus monostigma";
     } elseif ($string == "Lined Fang Blenny; Meiacanthus-lineatus") {
         return "Meiacanthus lineatus";
     } elseif ($string == "Reef Octopus encounter May 2010") {
         return "Octopus";
     } elseif ($string == "Comb Jellys; Phyllum Ctenophora") {
         return "Ctenophora";
     } elseif ($string == "Stony Coral Heteropsammia eupammides") {
         return "Heteropsammia eupammides";
     } elseif ($string == "Stony Coral Balanophyllia elegans") {
         return "Balanophyllia elegans";
     } elseif ($string == "Giant clam with Montipora coral surrounding.") {
         return "Acropora";
     } elseif ($string == "bakaruda-n-bigeyes") {
         return "Sphyraena";
     } elseif ($string == "Bandfish Acanthocepola species") {
         return "Acanthocepola";
     } elseif ($string == "Snakefish Trachinocephalus myops") {
         return "Trachinocephalus myops";
     } elseif ($string == "Deep bodied Silverbelly Gerres erythrourus") {
         return "Gerres erythrourus";
     } elseif ($string == "Ponyfish Leiognathus fasciatus") {
         return "Leiognathus fasciatus";
     } elseif ($string == "Parapercis species we think") {
         return "Parapercis";
     } elseif ($string == "Moorish Idol Zanclus cornutus") {
         return "Zanclus cornutus";
     } elseif ($string == "Large Shrimp Goby Cryptocentrus species") {
         return "Cryptocentrus";
     } elseif ($string == "Green Shrimp Goby Cryptocentrus octafasciatus") {
         return "Cryptocentrus octafasciatus";
     } elseif (in_array($string, array("Flagfin Shrimp Goby Mahidolia mystacina with Shrimp", "Flagfin Shrimp Goby Mahidolia mystacina"))) {
         return "Mahidolia mystacina";
     } elseif (in_array($string, array("blenny-head", "blenny-head-2", "blenny-head-3", "blenny-upper-body"))) {
         return "Blenniidae";
     } elseif (in_array($string, array("false-cleanerfish-2", "false-cleanerfish-aspidontus-taeniatus"))) {
         return "Aspidontus taeniatus";
     } elseif ($string == "blenny-omobranchus-sp-uuh") {
         return "Omobranchus";
     } elseif (in_array($string, array("rabbitfish-juve-demise", "rabbitfish-juves", "rabbitfish-juves-2", "rabbitfish-juves-en-masse"))) {
         return "Siganus";
     } elseif (in_array($string, array("Unidentified Goby", "Unidentified Goby tail", "Shrimp Gobies unidentified", "Shrimp Goby unidentified", "Drab Shrimp Goby with Shrimp"))) {
         return "Gobiidae";
     } elseif (in_array($string, array("pallid-pipefish-uuh", "slender-pipefish-2"))) {
         return "Syngnathinae";
     } elseif (in_array($string, array("squirrelfish-red-1", "squirrelfish-red-3"))) {
         return "Holocentrus";
     } elseif ($string == "giant-trevally-caranx-ignobilis") {
         return "Caranx ignobilis";
     } elseif ($string == "trevally-fringe-finned-pantolobus-radiatus") {
         return "Pantolobus radiatus";
     } elseif ($string == "trevally-golden-juv-gnathanodon-speciosus") {
         return "Gnathanodon speciosus";
     } elseif ($string == "trevally-fringe-finned-2") {
         return "Caranx";
     } elseif ($string == "slender-pipefish-trachyrhampus-longirostris-1") {
         return "Trachyrhampus longirostris";
     } elseif ($string == "Platycephalus-caeruleopunctatus") {
         return "Platycephalus caeruleopunctatus";
     } elseif (in_array($string, array("Unidentified Juvenile", "Small juvenile fish with Sea Pen"))) {
         return "";
     } elseif ($string == "Unidentified Hard Coral") {
         return "Scleractinia";
     } elseif (in_array($string, array("Unidentified bivalve Mollusc", "Bivalve with encrusting Sponge and Barnacles", "Large Mussel, free standing out on the sand.", "Unidentified Clam; small", "Unidentified Clam"))) {
         return "Bivalvia";
     } elseif (in_array($string, array("Scythozoa; other Jellyfish", "Pelagidae; Sea nettles", "Ellisellidae fam; Wire Coral", "Pennatulacea; Sea Pens", "Gorgonacea; Sea Fans/Whip Corals", "Ellisellidae; Sea Fans.", "Echinigorgia; Sea Fans", "Nephtheidae; more Soft corals", "Alcyoniidae; some Soft Corals", "Heteractis Anemone, unknown species", "Anthopleura species, perhaps", "Dardanus lagapodes; Hermit Crab", "Dardanus megistos; Hermit Crab", "Oceanapia sagittaria; Porifera."))) {
         $temp = explode(";", $string);
         if (count($temp) == 1) {
             $temp = explode(",", $string);
         }
         if ($val = @$temp[0]) {
             return Functions::canonical_form($val);
         } else {
             return "";
         }
     } else {
         $string = trim(str_ireplace(" family", "", $string));
         $temp = explode(";", $string);
         if (count($temp) == 1) {
             $temp = explode(",", $string);
         }
         if ($val = @$temp[1]) {
             return Functions::canonical_form($val);
         } else {
             return Functions::canonical_form($temp[0]);
         }
     }
 }
예제 #13
0
 private static function evaluate_scientific_name($scientific_name)
 {
     if (preg_match_all("/<i>(.*?)<\\/i>/i", trim($scientific_name), $matches, PREG_SET_ORDER)) {
         $canonical_form = trim($matches[0][1]);
         if (@$matches[1]) {
             $canonical_form .= " " . trim($matches[1][1]);
         }
         if (@$matches[2]) {
             $canonical_form .= " " . trim($matches[2][1]);
         }
         $canonical_form = str_replace("ssp. ", "", $canonical_form);
         $scientific_name = str_ireplace("<i>", "", $scientific_name);
         $scientific_name = str_ireplace("</i>", "", $scientific_name);
         $scientific_name = str_replace("  ", " ", $scientific_name);
         $scientific_name = str_ireplace("&rsquo;", "'", $scientific_name);
     }
     if (@(!$canonical_form)) {
         $canonical_form = Functions::canonical_form($scientific_name);
     }
     $taxon_id = str_replace(" ", "_", strtolower($canonical_form));
     return array($scientific_name, $canonical_form, $taxon_id);
 }
 private function get_taxon_id($name)
 {
     if (is_numeric(stripos($name, " sp"))) {
         return str_ireplace(" ", "_", $name);
     } else {
         return str_ireplace(" ", "_", Functions::canonical_form($name));
     }
 }
예제 #15
0
 private function name_exists_in_eol($name)
 {
     $eol_api = "http://eol.org/api/search/1.0.json?exact=true&q=";
     if ($json = Functions::lookup_with_cache($eol_api . $name, $this->download_options)) {
         $taxon = json_decode($json, true);
         if (intval($taxon["totalResults"]) > 0) {
             return Functions::canonical_form($taxon["results"][0]["title"]);
         } else {
             return false;
         }
     }
 }
예제 #16
0
 private function get_texts($rec, $html, $agent_ids)
 {
     $descriptions = array();
     $match = false;
     /* 2 possible start hyperlinks (topic) */
     if (preg_match("/<a href=\"#intro\">(.*?)<\\/h3>/ims", $html, $arr) || preg_match("/<a href=#intro>(.*?)<\\/h3>/ims", $html, $arr)) {
         $match = $arr[1];
         $term = "intro";
     } elseif (preg_match("/<a href=\"#dist\">(.*?)<\\/h3>/ims", $html, $arr) || preg_match("/<a href=\\#dist\\>(.*?)<\\/h3>/ims", $html, $arr)) {
         $match = $arr[1];
         $term = "dist";
     } else {
         echo "\n alert: investigate 30: -- {$rec['url']}\n";
     }
     if ($match) {
         $string = '<a href="#' . $term . '">' . $match;
         echo "\n" . $string . "\n";
         $items = explode("-", $string);
         $items = array_filter(array_map('trim', $items));
         // will trim all values of the array
         print_r($items);
         // remove language links
         $i = 0;
         foreach ($items as $item) {
             if (is_numeric(stripos($item, "Versi&oacute;n en Espa&ntilde;ol"))) {
                 $items[$i] = NULL;
             } elseif (is_numeric(stripos($item, "Version en Espa&ntilde;ol"))) {
                 $items[$i] = NULL;
             } elseif (is_numeric(stripos($item, "en Espa&ntilde;ol"))) {
                 $items[$i] = NULL;
             } elseif (is_numeric(stripos($item, 'href="Mahogany_borer'))) {
                 $items[$i] = NULL;
             } elseif (is_numeric(stripos($item, 'href="mahogany_webworm'))) {
                 $items[$i] = NULL;
             } elseif (is_numeric(stripos($item, 'Traduction Fran&ccedil;aise'))) {
                 $items[$i] = NULL;
             }
             $i++;
         }
         $items = array_values(array_filter($items));
         // strip tags
         $i = 0;
         foreach ($items as $item) {
             $items[$i] = strip_tags($item, "<a>");
             $i++;
         }
         $items = array_values(array_filter($items));
         print_r($items);
         // manual adjustment
         $items = self::topic_order_adjustment($items, $rec["url"]);
         $connections = array();
         foreach ($items as $item) {
             if (preg_match("/<a href=\"#(.*?)\"/ims", $item, $arr) || preg_match("/<a href=#(.*?)>/ims", $item, $arr)) {
                 $name = $arr[1];
             } else {
                 echo "\n alert: investigate 02: [{$item}] -- {$rec['url']}\n";
             }
             if (preg_match("/>(.*?)</ims", $item, $arr)) {
                 $title = $arr[1];
             } else {
                 echo "\n alert: investigate 03: [{$item}] -- {$rec['url']}\n";
             }
             $connections[] = array("name" => $name, "title" => $title);
         }
         echo "\n connections:\n";
         $i = 0;
         $count = count($connections);
         foreach ($connections as $conn) {
             $name = $conn["name"];
             if ($i + 1 == $count) {
                 $href2 = "</ul>";
             } else {
                 $name2 = $connections[$i + 1]["name"];
                 $href2 = '<a name="' . $name2 . '"';
                 $href2_noquote = '<a name=' . $name2 . '';
             }
             $href1 = '<a name="' . $name . '"';
             $href1_noquote = '<a name=' . $name . '';
             echo "\n {$href1} -- {$href2} \n";
             $href1 = str_ireplace("/", "\\/", $href1);
             $href2 = str_ireplace("/", "\\/", $href2);
             $href1 = str_ireplace("(", "\\(", $href1);
             $href2 = str_ireplace("(", "\\(", $href2);
             $href1 = str_ireplace(")", "\\)", $href1);
             $href2 = str_ireplace(")", "\\)", $href2);
             if (preg_match("/{$href1}(.*?){$href2}/ims", $html, $arr)) {
                 $connections[$i]["desc"] = $href1 . $arr[1];
             } elseif (preg_match("/{$href1_noquote}(.*?){$href2_noquote}/ims", $html, $arr)) {
                 $connections[$i]["desc"] = $href1 . $arr[1];
             } elseif (preg_match("/{$href1}(.*?){$href2_noquote}/ims", $html, $arr)) {
                 $connections[$i]["desc"] = $href1 . $arr[1];
             } elseif (preg_match("/{$href1_noquote}(.*?){$href2}/ims", $html, $arr)) {
                 $connections[$i]["desc"] = $href1 . $arr[1];
             } else {
                 echo "\n alert: investigate 04: [{$href1}][{$href2}]\n";
             }
             $i++;
         }
         $this->text_count += count($connections);
         echo "\n article count per taxon: " . count($connections);
         $reference_ids = self::prepare_object_refs($connections);
         foreach ($connections as $conn) {
             $title = trim($conn["title"]);
             if (is_numeric(stripos($title, "References"))) {
                 continue;
             }
             $description = $conn["desc"];
             $description = str_ireplace('<a href="#top" class="backtop">(Back to Top)</a>', '', $description);
             $description = strip_tags($description, "<p><br><i><ul><li><table><tr><td><a><img>");
             $path_parts = pathinfo($rec["url"]);
             $description = str_ireplace('<img src="', '<img src="' . $path_parts["dirname"] . '/', $description);
             $description = str_ireplace('<a href="../../', '<a href="http://entnemdept.ufl.edu/creatures/', $description);
             if (!($subject = @$this->subject[$title])) {
                 if (!($subject = self::other_subject_assignment($title))) {
                     if (in_array($rec["url"], array("http://entnemdept.ufl.edu/creatures/misc/gastro/snail_eating_snails.htm"))) {
                         if ($title == Functions::canonical_form($rec["sciname"])) {
                             echo "\n [{$title}] EXACT taxon for the page \n";
                             $subject = $this->SPM . "#Morphology";
                             // hasn't divided the diff topics yet
                         } else {
                             echo "\n [{$title}] not exact taxon for the page \n";
                             echo "\n undefined subject 01: [{$title}][{$description}]\n";
                             continue;
                         }
                     } elseif (in_array($rec["url"], array("http://entnemdept.ufl.edu/creatures/misc/jumping_spiders.htm"))) {
                         if ($title == Functions::canonical_form($rec["sciname"])) {
                             echo "\n [{$title}] EXACT taxon for the page \n";
                             $subject = $this->SPM . "#Description";
                             if (is_numeric(stripos($description, "Synonym"))) {
                                 $subject = $this->EOL . "#Taxonomy";
                             }
                         } else {
                             echo "\n [{$title}] not exact taxon for the page \n";
                             echo "\n undefined subject 02: [{$title}][{$description}]\n";
                             continue;
                         }
                     } else {
                         echo " --- will continue...[{$title}][{$subject}]";
                         continue;
                     }
                 }
                 echo "\n final subject: [{$title}][{$subject}]\n";
             }
             // remove row before <p>
             $pos = stripos($description, "<p>");
             if (is_numeric($pos) && $pos < 100) {
                 $description = trim(substr($description, $pos + 3, strlen($description)));
             }
             echo "\n {$title}: [{$description}] \n";
             $identifier = (string) $rec["taxon_id"] . "_" . str_replace(" ", "_", $title);
             if (in_array($identifier, $this->do_ids) || !$description) {
                 continue;
             } else {
                 $this->do_ids[] = $identifier;
             }
             $mr = new \eol_schema\MediaResource();
             if ($reference_ids) {
                 $mr->referenceID = implode("; ", $reference_ids);
             }
             if ($agent_ids) {
                 $mr->agentID = implode("; ", $agent_ids);
             }
             $mr->taxonID = (string) $rec["taxon_id"];
             $mr->identifier = $identifier;
             $mr->type = "http://purl.org/dc/dcmitype/Text";
             $mr->language = 'en';
             $mr->format = "text/html";
             $mr->furtherInformationURL = (string) $rec['url'];
             $mr->CVterm = (string) $subject;
             $mr->Owner = "";
             $mr->title = (string) $title;
             $mr->UsageTerms = "http://creativecommons.org/licenses/by-nc-sa/3.0/";
             $mr->description = (string) $description;
             $this->archive_builder->write_object_to_file($mr);
         }
     } else {
         echo "\n alert: investigate 01: {$rec['url']}\n";
     }
 }
 private function get_the_right_tc_record($tcs, $sciname)
 {
     if (!$tcs) {
         return false;
     }
     $tc_rec = false;
     foreach ($tcs as $tc) {
         if ($tc['scientificName'] == $sciname) {
             $tc_rec = $tc;
             break;
         }
     }
     if (!$tc_rec) {
         foreach ($tcs as $tc) {
             if ($tc['scientificName'] == Functions::canonical_form($sciname)) {
                 $tc_rec = $tc;
                 break;
             }
         }
     }
     if (!$tc_rec) {
         foreach ($tcs as $tc) {
             if (Functions::canonical_form($tc['scientificName']) == Functions::canonical_form($sciname)) {
                 $tc_rec = $tc;
                 break;
             }
         }
     }
     return $tc_rec;
 }
예제 #18
0
 private function process_line_items($items, $url)
 {
     $items = array_filter($items);
     //remove null array
     $final = array();
     foreach ($items as $item) {
         if (preg_match_all("/<font size=\"-2\">(.*?)<\\/font>/ims", $item, $arr)) {
             continue;
         }
         //e.g. http://www.ntnu.no/ub/scorpion-files/buthidae.php - Buthoscorpio Werner, 1936
         if (preg_match_all("/<font size=\"1\">(.*?)<\\/font>/ims", $item, $arr)) {
             continue;
         }
         $item = strip_tags($item, "<strong>");
         if (is_numeric(stripos($item, "strong")) && !self::is_nomen_dubium($item)) {
             $genus = self::format_utf8(trim(strip_tags($item)));
         } else {
             if (isset($genus)) {
                 if (!trim($item)) {
                     continue;
                 }
                 $first_char = substr($genus, 0, 1) . ".";
                 $species = Functions::canonical_form($genus) . " " . trim(str_replace($first_char, "", $item));
                 $species = strip_tags($species);
                 if ($species != Functions::canonical_form($genus) . " ") {
                     $final[$genus][] = self::format_utf8($species);
                 }
             }
         }
     }
     return $final;
 }
예제 #19
0
 public static function get_all_taxa_keys($resource_id)
 {
     require_library('CheckIfNameHasAnEOLPage');
     $func = new CheckIfNameHasAnEOLPage();
     $GLOBALS['animal_plant_list'] = self::prepare_animal_plant_list();
     $temp = self::prepare_taxa_list();
     $taxa_objects = $temp[0];
     $synonymy = $temp[1];
     $names_to_be_added = $temp[2];
     $all_taxa = array();
     $used_collection_ids = array();
     //initialize text file for USFWS
     self::initialize_text_file(DOC_ROOT . self::TEXT_FILE_FOR_PARTNER);
     $i = 0;
     $no_eol_page = 0;
     foreach ($taxa_objects as $name => $taxon) {
         $i++;
         if (@$synonymy[$name]) {
             $name = trim($synonymy[$name]['EOL NAME']);
             $taxon['NAME'] = $name;
         } elseif (@$names_to_be_added[$name]) {
         } else {
             //filter names. Process only those who already have a page in EOL. Report back to USFWS names not found in EOL
             $name = str_replace(".", "", $name);
             if (!self::name_in_eol($name, $func)) {
                 self::store_name_to_text_file($name);
                 $name_canonical = Functions::canonical_form($name);
                 // try the canonical form
                 if ($name == $name_canonical) {
                     $no_eol_page++;
                     continue;
                 }
                 if (!self::name_in_eol($name_canonical, $func)) {
                     $name_without_parenthesis = self::remove_parenthesis($name);
                     // try name without parenthesis
                     if ($name == $name_without_parenthesis) {
                         $no_eol_page++;
                         continue;
                     }
                     if (!self::name_in_eol($name_without_parenthesis, $func)) {
                         $no_eol_page++;
                         continue;
                     } else {
                         echo "\n OK name without parenthesis in EOL: [{$name_without_parenthesis}]\n";
                         $taxon['NAME'] = $name_without_parenthesis;
                         self::store_name_to_text_file($name . " - name without parenthesis found in EOL: {$name_without_parenthesis}");
                     }
                 } else {
                     echo "\n OK canonical name in EOL: [{$name_canonical}]\n";
                     $taxon['NAME'] = $name_canonical;
                     self::store_name_to_text_file($name . " - canonical form of the name found in EOL: {$name_canonical}");
                 }
             }
         }
         echo "\n {$i} -- ";
         echo $taxon['NAME'] . " -- ";
         $arr = self::get_usfws_taxa($taxon, $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         if ($page_taxa) {
             $all_taxa = array_merge($all_taxa, $page_taxa);
         }
     }
     $xml = \SchemaDocument::get_taxon_xml($all_taxa);
     $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
     if (!($OUT = fopen($resource_path, "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     $with_eol_page = $i - $no_eol_page;
     echo "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n";
 }
예제 #20
0
 private function get_taxa_from_description($str, $type)
 {
     if ($type == "animals") {
         // exclude name with these substrings
         $excluded = array("America", "adult", "European", "record", "prey ", "female", " are ", "English", "kinds ", "possibly", "complex", "cherry ", " spiders", "small ");
         // remove these substrings in names
         $to_be_removed = array("Dead arthropods belonging to", "Leaf-mining larvae of ", "Larvae of leaf-mining ", "are the preferred prey", " from Galactica.", "Larvae of ", "young nymphs", "(?)", "Leaf-mining ", "?", "REY.", "Badgers", "Dead ", "in europe", " larva", " larvae", " nymphs", " nymph", "most commonly", "in N. Amer.", "workers.", "  in", "laboratory.", "group", "Primarily", "juveniles.", "juvenile", " probably ");
     } elseif ($type == "plants") {
         // exclude name with these substrings
         $excluded = array("Unknown", "may apply to nesting site", "may apply to a nesting site");
         // remove these substrings in names
         $to_be_removed = array("Collects pollen and nectar primarily from summer and fall flowering", "and some composites were found in small amounts but possibly came from nectar plants", "Examined nest provisions consisted exclusively of nectar and pollen from flowers of", "Probably collects pollen mainly from the flowers of", "Evidently collects pollen from the flowers of", "Apparently collects pollen only from the flowers of", "Evidently depends mostly upon the pollen and nectar of", "on which it may be an oligolege owing to its slender form and long tongue.", "Apparently collects pollen only from vernal and autumnal flowering", "Apparently an oligolege of summer and fall flowering", "Oligolege of summer and fall flowering", "Presumably an oligolege of the", "Evidently an oligolege of", "Apparently an oligolege of", "Polylege with some preference for flowers of", "Apparently collects pollen from the flowers of", "Apparently strictly oligolectic on", "Apparently may collect pollen from the flowers of", "Collects pollen principally from", "Appears to collect pollen principally from the", "Analyzed pollen stores indicates reliance on", "Apparently mainly dependent upon the pollens of", "Apparently prefers pollen from", "Collects pollen from flowers of", "has been listed as collecting pollen from flowers of", "but visits flowers of", "Recorded from flowers of", "Visits flowers of", "Possibly autumnal flowering", "Presumably autumnal flowering", "Presumably an vernal and autumnal flowering", "Collects pollen from the flowers of", "Possibly an autumnal flowering", "Collects pollen of", "Oligolectic on flowers of", "Polylege with apparent preference for flowers of", "Collects pollen regularly only from", "Collects pollen only from the flowers of", "Primarily associated with flowers of", "Collects pollen and nectar chiefly from", "Collects pollen from early morning opening ligulate", "Collects pollen almost exclusively from ligulate", "Collects pollen from ligulate", "Based upon the mouth parts of the female", "Collects pollen primarily from microseridine", "Principal source of pollen is", "Principally gathers pollen from", "Principal pollen source is", "Collects pollen from stephanomerine", "Apparently collects pollen primarily from", "Collects pollen primarily from ligulate", "Most polylectic of all the species of the subgenus", "Presumably an late summer and fall flowering", "Probably oligolectic on a wide range of", "Apparently a polylege with preferences for flowers of", "Polylege with some preference for the flowers of", "mesophytic", "xerophytic", "Oligolectic on uncultivated", "Oligolectic on ligulate", "Possibly oligolectic on", "Polylectic with some preference for the pollens of the", "Presumably gathers pollen from", "Presumably an fall flowering", "which it apparently prefers", "but visits other flowers for nectar", "Apparently oligolectic on", "these and other flowers", "in approximately that order", " in that order", " in the early morning", "Oligolege of ", "for nectar.", "for pollen and nectar", "as the primary source of pollen", "in the fall", " groups", "also present", "presumably for nectar", "Collects pollen from", "Collects pollen");
     }
     $to_be_removed[] = "sp.";
     $to_be_removed[] = "spp.";
     $scinames = array();
     $separators = array(",", ";");
     if ($type == "plants") {
         $separators[] = "including";
         $separators[] = "although";
         $separators[] = "especially";
         $separators[] = "and a secondary preference for";
         $separators[] = "with some preference for flowers of the genus";
         $separators[] = "as well as";
         $separators[] = "and possibly";
         $separators[] = "pollens and one cell was provisioned entirely with pollen from";
         $separators[] = "and secondarily";
         $separators[] = "and various legumes";
         $separators[] = "and most";
         $separators[] = "and is probably an";
         $separators[] = "and small amounts of";
         $separators[] = "Stores pollen of ";
         $separators[] = "and to a lesser extent those of the";
         $separators[] = "and also from";
     } elseif ($type == "animals") {
         $separators[] = "Other predators include";
     }
     $names = self::get_words_from_string($str, $separators);
     foreach ($names as $name) {
         if ($type == "animals") {
             if (is_numeric(stripos($name, "Ceresini  probably Stictocephala"))) {
                 $scinames["Ceresini"] = '';
                 $scinames["Stictocephala"] = '';
                 continue;
             }
             if (is_numeric(stripos($name, "Tachiinae probably Sibinia"))) {
                 $scinames["Tachiinae"] = '';
                 $scinames["Sibinia"] = '';
                 continue;
             }
         }
         $name = trim(str_ireplace($to_be_removed, "", $name));
         foreach ($excluded as $string) {
             if (is_numeric(stripos($name, $string))) {
                 $name = false;
             }
         }
         if (ctype_lower(substr($name, 0, 1))) {
             continue;
         }
         // ignore if it starts with small char
         if (strlen($name) <= 3) {
             continue;
         }
         if (substr($name, -2) == ").") {
             $name = substr($name, 0, strlen($name) - 1);
         }
         // if last two chars is ")." - remove "."
         if ($type == "animals") {
             if (is_numeric(strpos($name, "C. "))) {
                 $name = str_replace("C. rosaceana Harr.", "Choristoneura rosaceana Harr.", $name);
                 $name = str_replace("C. fumiferana (Clem.)", "Choristoneura fumiferana (Clem.)", $name);
                 $name = str_replace("C. pinus Free.", "Choristoneura pinus Free.", $name);
             }
         }
         if ($type == "plants") {
             if (is_numeric(stripos($name, "Oligolectic on Cucurbita foetidissima"))) {
                 $name = "Cucurbita foetidissima";
             } elseif (is_numeric(stripos($name, "Hemizonia paniculata. Males and females"))) {
                 $name = "Hemizonia paniculata";
             } elseif (is_numeric(stripos($name, "collected pollen mainly from Vaccinium stramineum"))) {
                 $name = "Vaccinium stramineum";
             } elseif (is_numeric(stripos($name, "96 per cent Faboideae"))) {
                 $name = "Faboideae";
             } elseif (is_numeric(stripos($name, "per cent from Compositae"))) {
                 $name = "Compositae";
             } elseif (is_numeric(stripos($name, "Principally Camissonia cheiranthifolia cheiranthifolia"))) {
                 $name = "Camissonia cheiranthifolia cheiranthifolia";
             } elseif (is_numeric(stripos($name, "Malvaceous genus Callirhoe"))) {
                 $name = "Callirhoe";
             } elseif (is_numeric(stripos($name, "Malacothrix) and desert shrubs"))) {
                 $name = "Malacothrix";
             } elseif (is_numeric(stripos($name, "Phacelia (collecting pollen in one instance) Raphanus sativus"))) {
                 $scinames["Phacelia"] = '';
                 $scinames["Raphanus sativus"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "primarily from summer and fall flowering Astereae (Compositae)"))) {
                 $scinames["Astereae"] = '';
                 $scinames["Compositae"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Solidago with some preference for flowers of Solidago and Aster"))) {
                 $scinames["Solidago"] = '';
                 $scinames["Aster"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Mimosa and also has been collected at the flowers of Melilotus."))) {
                 $scinames["Mimosa"] = '';
                 $scinames["Melilotus"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Gutierrezia microcephala. Haplopappus heterophyllus."))) {
                 $scinames["Haplopappus heterophyllus"] = '';
                 $scinames["Gutierrezia microcephala"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Cucurbita digitata and Cucurbita palmata of the Digitata group"))) {
                 $scinames["Cucurbita digitata"] = '';
                 $scinames["Cucurbita palmata"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Thysanella. Males have been taken while they were visiting honeydew of a Phylloxera infesting Quercus falcata"))) {
                 $scinames["Thysanella"] = '';
                 $scinames["Quercus falcata"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Solidago. It has also been collected at honey dew of Phylloxera on Quercus alba"))) {
                 $scinames["Quercus alba"] = '';
                 $scinames["Solidago"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Kallstroemia grandiflora. Kallstroemia grandiflora."))) {
                 $scinames["Kallstroemia grandiflora"] = '';
                 $scinames["Kallstroemia grandiflora"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "from flowers of Leguminosae and Verbenaceae have been observed"))) {
                 $scinames["Leguminosae"] = '';
                 $scinames["Verbenaceae"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "observed it collecting pollen from Lepidium"))) {
                 $scinames["Lepidium"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Solidago canadensis. In Texas Hurd has taken it at flowers of Chamaesaracha coronopus"))) {
                 $scinames["Solidago canadensis"] = '';
                 $scinames["Chamaesaracha coronopus"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Sida hederacea and may also collect pollen from flowers of Sphaeralcea"))) {
                 $scinames["Sida hederacea"] = '';
                 $scinames["Sphaeralcea"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Camissonia c. aurantiaca Camissonia c. clavaeformis and also occasionally from Camissonia decorticans desertorum"))) {
                 $scinames["Camissonia c. aurantiaca"] = '';
                 $scinames["Camissonia c. clavaeformis"] = '';
                 $scinames["Camissonia decorticans desertorum"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Solidago canadensis Tamarix aralensis"))) {
                 $scinames["Solidago canadensis"] = '';
                 $scinames["Tamarix aralensis"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Solidago canadensis Tamarix aralensis"))) {
                 $scinames["Solidago canadensis"] = '';
                 $scinames["Tamarix aralensis"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Microseris nutans and locally obtains pollen from some of the crepidine Compositae such as Crepis occidentalis"))) {
                 $scinames["Microseris nutans"] = '';
                 $scinames["Crepis occidentalis"] = '';
                 $scinames["Compositae"] = '';
                 continue;
             } elseif (preg_match("/\\(originally(.*?)\\)/ims", $name, $tempx)) {
                 $name = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $name));
             } elseif ($tempx = explode("flowers of", $name)) {
                 if (count($tempx) == 1) {
                     $name = trim($tempx[0]);
                 } elseif (count($tempx) == 2) {
                     $name = trim($tempx[1]);
                 }
             }
         } elseif ($type == "animals") {
             if (is_numeric(stripos($name, "Ladder-backed woodpecker (Dendrocopus scalaris)"))) {
                 $name = "Dendrocopus scalaris";
             } elseif (is_numeric(stripos($name, "<. Pterophoridae"))) {
                 $name = "Pterophoridae";
             } elseif (is_numeric(strpos($name, "  near "))) {
                 $name = str_replace("  near ", "", $name);
             } elseif (is_numeric(strpos($name, " near "))) {
                 $name = str_replace(" near ", "", $name);
             } elseif (is_numeric(stripos($name, "Empoasca fabae (Harr.) Exitianus exitiosus Uhl."))) {
                 $scinames["Empoasca fabae (Harr.)"] = '';
                 $scinames["Exitianus exitiosus Uhl."] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Dolichopodidae and Empididae especially Platypalpus"))) {
                 $scinames["Dolichopodidae"] = '';
                 $scinames["Empididae"] = '';
                 $scinames["Platypalpus"] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Sarcophaga lherminieri R.- Desv. Sarcophaga opifera Coq."))) {
                 $scinames["Sarcophaga lherminieri R.- Desv."] = '';
                 $scinames["Sarcophaga opifera Coq."] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Villa chimaera (O. S.). Villa salebrosa Paint."))) {
                 $scinames["Villa chimaera (O. S.)"] = '';
                 $scinames["Villa salebrosa Paint."] = '';
                 continue;
             } elseif (is_numeric(stripos($name, "Eustala anastera (Walck.) Larinia directa (Hentz)"))) {
                 $scinames["Eustala anastera (Walck.)"] = '';
                 $scinames["Larinia directa (Hentz)"] = '';
                 continue;
             }
         }
         if (substr($name, 0, 1) == ".") {
             $name = trim(substr($name, 1, strlen($name)));
         }
         // remove first char if is period "."
         if (is_numeric(substr($name, 0, 1))) {
             continue;
         }
         // exclude if first char is numeric
         $name = trim(str_replace(" .", "", $name));
         if ($type == "plants") {
             if ($name == "Apparently a polylege with some preference for leguminous flowers" || $name == "Apparently a polylege with no strong preferences" || $name == "Robertson (1929. Flowers and insects") {
                 $name = "";
             }
         }
         $name = trim(str_ireplace($to_be_removed, "", $name));
         if (!is_numeric(stripos($name, " "))) {
             $name = Functions::canonical_form($name);
         }
         // if name is just 1 word, get canonical
         if ($type == "plants") {
             if ($tempx = explode(" and ", $name)) {
                 foreach ($tempx as $name) {
                     if ($name) {
                         $scinames[$name] = '';
                     }
                 }
             }
         }
         if ($name) {
             $scinames[$name] = '';
         }
     }
     $scinames = array_keys($scinames);
     $this->list_of_taxa = array_merge($this->list_of_taxa, $scinames);
     $this->list_of_taxa = array_unique($this->list_of_taxa);
     return $scinames;
 }
예제 #21
0
 private function get_texts()
 {
     require_library('connectors/BoldsImagesAPIv2');
     $path = BoldsImagesAPIv2::download_and_extract_remote_file($this->original_resource);
     if ($xml = Functions::lookup_with_cache($path, array('timeout' => 172800, 'download_attempts' => 2, 'delay_in_minutes' => 3))) {
         $xml = simplexml_load_string($xml);
         $total = count($xml->taxon);
         $i = 0;
         foreach ($xml->taxon as $t) {
             $i++;
             echo "\n {$i} of {$total}";
             $do_count = sizeof($t->dataObject);
             if ($do_count > 0) {
                 $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
                 $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
                 $taxonID = (string) trim($t_dc->identifier);
                 $source = self::clean_str("http://www.bioimages.org.uk/html/" . str_replace(" ", "_", Functions::canonical_form($t_dwc->ScientificName)) . ".htm");
                 //---------------------------------
                 $taxon = new \eol_schema\Taxon();
                 $taxon->taxonID = $taxonID;
                 $taxon->scientificName = $t_dwc->ScientificName;
                 $taxon->kingdom = $t_dwc->Kingdom;
                 $taxon->phylum = $t_dwc->Phylum;
                 $taxon->class = $t_dwc->Class;
                 $taxon->order = $t_dwc->Order;
                 $taxon->family = $t_dwc->Family;
                 $taxon->furtherInformationURL = $source;
                 echo "\n {$taxon->taxonID} - {$taxon->scientificName} [{$source}]";
                 if (isset($this->taxa[$taxonID])) {
                     echo " -- already exists";
                 } else {
                     $this->taxa[$taxonID] = $taxon;
                 }
                 //---------------------------------
                 foreach ($t->dataObject as $do) {
                     if ($do->dataType != "http://purl.org/dc/dcmitype/Text") {
                         continue;
                     }
                     $t_dc2 = $do->children("http://purl.org/dc/elements/1.1/");
                     $t_dcterms = $do->children("http://purl.org/dc/terms/");
                     //---------------------------
                     $agent_ids = array();
                     $r = new \eol_schema\Agent();
                     $r->term_name = $do->agent;
                     $r->identifier = md5("{$do->agent}|{$do->agent}['role']");
                     $r->agentRole = $do->agent['role'];
                     $r->term_homepage = "http://www.bioimages.org.uk/index.htm";
                     $agent_ids[] = $r->identifier;
                     if (!in_array($r->identifier, $this->resource_agent_ids)) {
                         $this->resource_agent_ids[] = $r->identifier;
                         $this->archive_builder->write_object_to_file($r);
                     }
                     //---------------------------
                     $text_identifier = self::clean_str($t_dc2->identifier);
                     if (in_array($text_identifier, $this->media_ids)) {
                         continue;
                     } else {
                         $this->media_ids[] = $text_identifier;
                     }
                     $mr = new \eol_schema\MediaResource();
                     if ($agent_ids) {
                         $mr->agentID = implode("; ", $agent_ids);
                     }
                     $mr->taxonID = $taxonID;
                     $mr->identifier = $text_identifier;
                     $mr->type = (string) "http://purl.org/dc/dcmitype/Text";
                     //$do->dataType;
                     $mr->language = "en";
                     $mr->format = "text/html";
                     //$do->mimeType;
                     $mr->furtherInformationURL = (string) trim($source);
                     /* very long text objects, temporarily ignored */
                     $problematic_objects = array("http://www.bioimages.org.uk/html/Betula.htm", "http://www.bioimages.org.uk/html/Broadleaved_trees.htm", "http://www.bioimages.org.uk/html/Fagus.htm", "http://www.bioimages.org.uk/html/Pinopsida.htm", "http://www.bioimages.org.uk/html/Poaceae.htm", "http://www.bioimages.org.uk/html/Quercus.htm", "http://www.bioimages.org.uk/html/Salix.htm", "http://www.bioimages.org.uk/html/Trees.htm");
                     if (in_array($mr->furtherInformationURL, $problematic_objects)) {
                         continue;
                     }
                     $mr->CVterm = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Associations";
                     $mr->Owner = "BioImages";
                     $mr->title = "Associations";
                     $mr->UsageTerms = "http://creativecommons.org/licenses/by-nc-sa/3.0/";
                     // $mr->audience       = 'Everyone';
                     // $mr->accessURI      = $source;
                     $description = (string) $t_dc2->description;
                     $description = trim(self::clean_str(utf8_encode($description)));
                     if (!$description) {
                         continue;
                     } else {
                         $mr->description = $description;
                         $this->archive_builder->write_object_to_file($mr);
                     }
                 }
             }
         }
     } else {
         echo "\n Down: " . $this->original_resource;
     }
     unlink($path);
     echo "\n temporary XML file removed: [{$path}]\n";
 }
예제 #22
0
 private function process_specimen_images($link, $func)
 {
     $fields = array("lngSpecies_ID", "lngImage_ID", "lngDocuTypeSpecimen", "lngPrep_ID", "lngSpecimen_ID", "lngImgType_ID", "blnPermission");
     $texts = $func->make_array($this->text_path["specimen_images"], $fields);
     array_shift($texts);
     $ref_ids = array();
     $agent_ids = array();
     $investigate = 0;
     foreach ($texts as $rec) {
         if ($rec["lngImage_ID"] == "lngImage_ID") {
             continue;
         }
         if ($rec["lngImage_ID"] == "lngImage_ID" || $rec["blnPermission"] == "FALSE") {
             continue;
         }
         $description = "";
         if ($rec["lngDocuTypeSpecimen"]) {
             $description .= $rec["lngDocuTypeSpecimen"];
             if ($rec["lngPrep_ID"]) {
                 $description .= ", " . $rec["lngPrep_ID"];
             }
         } else {
             if ($rec["lngPrep_ID"]) {
                 $description .= $rec["lngPrep_ID"];
             }
         }
         $rec["lngImage_ID"] = self::remove_quotes($rec["lngImage_ID"]);
         $media_url = self::get_image_path($rec["lngImage_ID"], $rec["lngImgType_ID"]);
         if (!$media_url) {
             continue;
         }
         $rec["lngImage_ID"] = str_ireplace(" ", "_", $rec["lngImage_ID"]);
         $media_id = $rec["lngImage_ID"];
         if ($rec["lngImage_ID"]) {
             $rec["lngSpecies_ID"] = self::remove_quotes($rec["lngSpecies_ID"]);
             if ($rec["lngSpecies_ID"] = trim(Functions::canonical_form($rec["lngSpecies_ID"]))) {
                 if ($taxon_id = @$link[$rec["lngSpecies_ID"]]) {
                     self::get_images($description, $taxon_id, $media_id, $media_url, $ref_ids, $agent_ids);
                 } else {
                     if ($taxon_id && $rec["lngSpecies_ID"] != "lngSpecies_ID" && !in_array($rec["lngSpecies_ID"], $this->invalid_taxa)) {
                         $investigate++;
                         echo "\n investigate: specimen images: [{$taxon_id}] --- taxon = " . $rec["lngSpecies_ID"] . "\n";
                     }
                 }
             }
         }
     }
     echo "\n investigate: {$investigate} \n";
 }
예제 #23
0
 private function clean_taxon_name($rec)
 {
     $strings = array(" sp ", " sp.");
     $found = false;
     foreach ($strings as $string) {
         if (is_numeric(stripos($rec["Species"], $string))) {
             $found = true;
         }
     }
     if ($found) {
         $rec["sciname"] = Functions::canonical_form($rec["Species"]);
         $rec["rank"] = "genus";
     } else {
         $rec["sciname"] = $rec["Species"];
         $rec["rank"] = "species";
     }
     return $rec;
 }
예제 #24
0
 private function csv_to_array($csv_file, $type)
 {
     if ($type != "families") {
         if ($val = $this->taxa_ids_with_blank_taxonomicStatus) {
             $taxa_ids_with_blank_taxonomicStatus = $val;
         } else {
             $taxa_ids_with_blank_taxonomicStatus = self::get_taxa_ids_with_blank_taxonomicStatus();
         }
     } else {
         $taxa_ids_with_blank_taxonomicStatus = array();
     }
     $i = 0;
     if (!($file = fopen($csv_file, "r"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $csv_file);
         return;
     }
     while (!feof($file)) {
         $i++;
         if ($i % 50000 == 0) {
             echo "\n [{$type}] {$i} - ";
         }
         if ($i == 1) {
             $fields = fgetcsv($file);
         } else {
             $rec = array();
             $temp = fgetcsv($file);
             $k = 0;
             if (!$temp) {
                 continue;
             }
             foreach ($temp as $t) {
                 $rec[$fields[$k]] = $t;
                 $k++;
             }
             $rec = array_map('trim', $rec);
             /* stats
                $this->debug["TAXONOMICSTATUS"][$rec["TAXONOMICSTATUS"]] = '';
                $this->debug["NOMENCLATURALSTATUS"][$rec["NOMENCLATURALSTATUS"]] = '';
                $this->debug["TAXONRANK"][$rec["TAXONRANK"]] = '';
                continue;
                */
             if (in_array($type, array("get_taxa_ids_with_data", "extant_habitat_data"))) {
                 $taxon_id = $rec["TAXON_ID"];
             } else {
                 $taxon_id = $rec["TAXONID"];
             }
             if (isset($taxa_ids_with_blank_taxonomicStatus[$taxon_id])) {
                 continue;
             }
             if ($type == "classification") {
                 $this->create_instances_from_taxon_object($rec);
             } elseif ($type == "classification2") {
                 self::get_list_of_taxon_ids($rec);
             } elseif ($type == "extant_habitat_data") {
                 self::process_profile($rec);
             } elseif ($type == "families") {
                 if ($rec["TAXONRANK"] == "family") {
                     $records[] = Functions::canonical_form($rec["SCIENTIFICNAME"]);
                 }
             }
         }
     }
     fclose($file);
     if ($type == "get_taxa_ids_with_data") {
         return $taxon_ids;
     }
     if ($type == "families") {
         return array_unique($records);
     }
 }
예제 #25
0
 private function process_hotlist_spreadsheet()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $families = array();
     $doc = "http://localhost/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx";
     $doc = "http://localhost/~eolit/eli/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx";
     //for MacBook
     echo "\n processing [{$doc}]...\n";
     if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         $arr = $parser->convert_sheet_to_array($path);
         $i = -1;
         foreach ($arr['Animals'] as $sciname) {
             $i++;
             $sciname = trim(Functions::canonical_form($sciname));
             if (stripos($sciname, " ") !== false) {
                 $taxon_concept_id = $arr['1'][$i];
                 echo "\n{$i}. [{$sciname}][{$taxon_concept_id}]";
                 //==================
                 $m = 10000;
                 $cont = false;
                 if ($i >= 1 && $i < $m) {
                     $cont = true;
                 }
                 // if($i >=  $m   && $i < $m*2)  $cont = true;
                 // if($i >=  $m*2 && $i < $m*3)  $cont = true;
                 // if($i >=  $m*3 && $i < $m*4)  $cont = true;
                 // if($i >=  $m*4 && $i < $m*5)  $cont = true;
                 // if($i >=  $m*5 && $i < $m*6)  $cont = true;
                 // if($i >=  $m*6 && $i < $m*7)  $cont = true;
                 if (!$cont) {
                     continue;
                 }
                 self::main_loop($sciname, $taxon_concept_id);
                 //==================
                 // break; //debug - process only 1
             }
         }
         unlink($path);
     } else {
         echo "\n [{$doc}] unavailable! \n";
     }
 }
예제 #26
0
 private function get_all_taxa($task)
 {
     require_library('CheckIfNameHasAnEOLPage');
     $func = new CheckIfNameHasAnEOLPage();
     $all_taxa = array();
     $used_collection_ids = array();
     //initialize text file for DiscoverLife: save names without a page in EOL
     self::initialize_text_file(self::$TEXT_FILE_FOR_DL);
     $filename = self::$TEMP_FILE_PATH . $task . ".txt";
     if (!($FILE = fopen($filename, "r"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $filename);
         return;
     }
     $i = 0;
     $save_count = 0;
     $no_eol_page = 0;
     while (!feof($FILE)) {
         if ($line = fgets($FILE)) {
             sleep(1);
             $name = trim($line);
             $i++;
             //Filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL
             $arr = $func->check_if_name_has_EOL_page($name);
             $if_name_has_page_in_EOL = $arr[0];
             $xml_from_api = $arr[1];
             if (!$if_name_has_page_in_EOL) {
                 print "\n - no EOL page ({$name})";
                 $no_eol_page++;
                 self::store_name_to_text_file($name, $task);
                 continue;
             }
             $taxon = array();
             $taxon = $func->get_taxon_simple_stat($name, $xml_from_api);
             $taxon["map"] = 1;
             if (trim($name) == trim(Functions::canonical_form(trim($taxon['sciname'])))) {
                 $taxon["call_back"] = "taxon_concept_id";
             } else {
                 $taxon["call_back"] = "scientific_name";
             }
             print "\n {$i} -- " . $taxon['sciname'] . "\n";
             $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids);
             $page_taxa = $arr[0];
             $used_collection_ids = $arr[1];
             if ($page_taxa) {
                 $all_taxa = array_merge($all_taxa, $page_taxa);
             }
             unset($page_taxa);
         }
     }
     fclose($FILE);
     $xml = SchemaDocument::get_taxon_xml($all_taxa);
     $resource_path = CONTENT_RESOURCE_LOCAL_PATH . "DiscoverLife/temp_DiscoverLife_" . $task . ".xml";
     if (!($OUT = fopen($resource_path, "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     $with_eol_page = $i - $no_eol_page;
     print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n ";
 }
예제 #27
0
 private function get_worms_taxa()
 {
     require_library('connectors/FishBaseAPI');
     $func = new FishBaseAPI();
     $fields = array();
     $excluded_fields = array("taxonID", "furtherInformationURL", "referenceID", "namePublishedIn", "taxonomicStatus", "taxonRemarks", "rightsHolder", "parentNameUsageID");
     $taxa = $func->make_array($this->text_path["worms"]["worms_taxon"], $fields, "", $excluded_fields);
     array_shift($taxa);
     foreach ($taxa as $taxon) {
         $final[Functions::canonical_form($taxon["scientificName"])] = $taxon["taxonRank"];
     }
     unset($taxa);
     return $final;
 }
예제 #28
0
 private function create_instances_from_taxon_object($records)
 {
     foreach ($records as $sciname => $taxon_dna_records) {
         $taxon = new \eol_schema\Taxon();
         $taxon->taxonID = strtolower(str_replace(" ", "_", Functions::canonical_form($sciname)));
         $taxon->scientificName = $sciname;
         $taxon->furtherInformationURL = $taxon_dna_records["source"];
         $this->taxa[$taxon->taxonID] = $taxon;
         continue;
         // debug - comment to exclude structured data
         foreach ($taxon_dna_records["rekords"] as $record) {
             $rec = array();
             $rec["taxon_id"] = $taxon->taxonID;
             $rec["source"] = $this->ggbn_domain . $record["href"];
             $rec["object_id"] = $record["rec_id"] . "_dna_no";
             $measurement = "http://rs.tdwg.org/dwc/terms/catalogNumber";
             self::add_string_types($rec, "dna_no", $record["dna_no"], $measurement, $sciname, "true");
             $rec["object_id"] = $record["rec_id"] . "_specimen_no";
             $measurement = "http://rs.tdwg.org/ontology/voc/Specimen#specimenID";
             self::add_string_types($rec, "specimen_no", $record["specimen_no"], $measurement, $sciname, "false");
             if (!is_numeric(stripos($record["country"], "unknown"))) {
                 $rec["object_id"] = $record["rec_id"] . "_country";
                 $measurement = "http://rs.tdwg.org/dwc/terms/country";
                 self::add_string_types($rec, "country", $record["country"], $measurement, $sciname, "false");
             }
         }
     }
 }