/** * Function : process * Purpose: Perform exact and fuzzy matching on a species name, or single genus name * Input: - genus, genus+species, or genus+species+authority (in this version), as "searchtxt" * - "search_mode" to control search mode: currently normal (default) / rapid / no_shaping * - "debug" - print internal parameters used if not null * Outputs: list of genera and species that match (or near match) input terms, with associated * ancillary info as desired * Remarks: * (1) This demo version is configured to access base data in three tables: * - genlist_test1 (genus info); primary key (PK) is genus_id * - splist_test1 (species info); PK is species_id, has genus_id as foreign key (FK) * (= link to relevant row in genus table) * - auth_abbrev_test1 (authority abbreviations - required by subsidiary function * "normalize_auth". Refer README file for relevant minimum table definitions. * If authority comparisons are not required, calls to "normalize_auth" can be disabled and * relevant function commented out, removing need for third table. * (In a production system, table and column names can be varied as desired so long as * code is altered at relevant points, also could be re-configured to hold all genus+species info together in a single table with minor re-write). * (2) Writes to and reads back from pre-defined global temporary tables * "genus_id_matches" and "species_id_matches", new instances of these are automatically * created for each session (i.e., do not need clearing at procedure end). Refer * README file for relevant table definitions. * (3) When result shaping is on in this version, a relevant message displayed as required * for developer feedback, if more distant results are being masked (in producton version, * possibly would not do this) * (4) Requires the following subsidiary functions (supplied elsewhere in this package): * - normalize * - normalize_auth * - reduce_spaces * - ngram * - compare_auth * - near_match * - mdld * (5) Accepts "+" as input separator in place of space (e.g. "H**o+sapiens"), e.g. for calling * via a HTTP GET request as needed. * @param string $searchtxt : genus, genus+species, or genus+species+authority * @param string $search_mode : normal (default) / rapid / no_shaping * @param boolean $cache * @return boolean */ public function process($searchtxt, $search_mode = 'normal', $cache = false) { $this->input = $searchtxt; $this->search_mode = $search_mode; $this->searchtxt = $searchtxt; $this->debug['process'][] = "1 (searchtxt:{$searchtxt}) (search_mode:{$search_mode})"; $this->this_search_family = ''; $this->this_search_genus = ''; $this->this_search_species = ''; $this->this_authority = ''; $this->this_authorities = array(); $this->this_search_infra1 = ''; $this->this_search_infra2 = ''; $this->this_search_rank1 = ''; $this->this_search_rank2 = ''; $this->this_start_string = ''; $this->this_cleaned_txt = ''; $this->this_family_string = ''; $this->this_family_unmatched = ''; $this->this_status_string = ''; $text_str = $searchtxt; // accept "+" as separator if supplied, tranform to space if (strpos($text_str, '+') !== false) { $text_str = str_replace('+', ' ', $text_str); } #$replace=array("%", "<", "{", "}", "&", "_", "\t"); $replace = array("\t"); $text_str = str_replace($replace, ' ', $text_str); if (strpos($text_str, ' ') !== false) { $text_str = preg_replace("/ {2,}/", ' ', $text_str); } $text_str = trim($text_str); $this->debug['process'][] = "1a (text_str:{$text_str})"; if (is_null($text_str) || $text_str == '') { $this->debug['process'][] = "2 Return(false)"; return false; } if (preg_match('/^[^[:alpha:]]+/u', $text_str, $start_matches)) { $text_str = str_replace($start_matches[0], '', $text_str); $this->this_start_string = $start_matches[0]; } if (preg_match("/(?:(?:\\s|^)(?:\\-?cf\\.?|vel\\.? sp\\.? aff\\.?|\\-?aff\\.?)(?:\\s|\$))|(?:\\?+)/i", $text_str, $anno_matches)) { $text_str = trim(str_replace($anno_matches[0], ' ', $text_str)); $this->this_status_string = trim($anno_matches[0]); } $text_str = str_replace(' -', '-', $text_str); $text_str = str_replace('- ', '-', $text_str); $this->this_preprocessed_txt = $text_str; $text_str = preg_replace("/(?<=\\s|^)(?:\\S*[^[:alpha:][:space:]])?(indeterminad[ao]|undetermined|unknown|indet\\.?|sp\\.?\\s+nov\\.?|sp\\.?)(?:[^[:alpha:][:space:]]\\S*)?(?=\\s|\$)/i", ' ', $text_str); if (strpos($text_str, ' ') !== false) { $text_str = preg_replace("/ {2,}/", ' ', $text_str); } $text_str = trim($text_str); if (preg_match('/^(((?:[[:alpha:]]+aceae)|Cruciferae|Guttiferae|Umbelliferae|Compositae|Leguminosae|Palmae|Labiatae|Gramineae|Mimosoideae|Papilionoideae|Caesalpinioideae|fam(?:ily)?)((?:[^[:alpha:][:space:]]\\S*)?))(?=\\s+|$)/i', $text_str, $fam_matches)) { $text_str = trim(str_replace($fam_matches[0], '', $text_str)); $this->this_family_string = $fam_matches[1]; #$this->this_search_family=$fam_matches[2]; $this->this_search_family = mb_strtoupper(mb_substr($fam_matches[2], 0, 1)) . mb_strtolower(mb_substr($fam_matches[2], 1)); $this->this_family_unmatched = $fam_matches[3]; if (preg_match("/^fam(ily)?\$/i", $this->this_search_family)) { $this->this_family_unmatched = $this->this_search_family . $this->this_family_unmatched; $this->this_search_family = ''; } if (!$this->parse_only && $this->this_search_family) { $searchFamilyName = $this->db->searchFamilyName($this->this_search_family); if (isset($searchFamilyName)) { foreach ($searchFamilyName as $returnedFamilyName) { $this->saveFamilyMatches($returnedFamilyName->nameID, $this->this_search_family, 0, 'Y'); } } $nm = new NearMatch(); $this_near_match_family = $nm->near_match($this->this_search_family); $this_family_start = substr($this->this_search_family, 0, 3); $this_family_length = strlen($this->this_search_family); $family_res = $this->db->family_cur($this->search_mode, $this_near_match_family, $this_family_length, $this_family_start); if (count($family_res)) { foreach ($family_res as $drec) { $family_match = $this->match_family($this->this_search_family, $drec->search_family_name); if ($family_match['match']) { $phonetic_flag = $family_match['phonetic_match'] ? 'Y' : null; $this->saveFamilyMatches($drec->family_id, $drec->family, $family_match['edit_distance'], $phonetic_flag); } } // end foreach } } } if ($text_str == 'exit' || $text_str == 'end' || $text_str == 'q' || $text_str == '.') { return true; } //unhyphened trinormial if (preg_match('/^([[:alpha:]]+) ([[:alpha:]]+)[\\.\\s]([[:alpha:]]+)(.*)/', $text_str, $matches)) { $specific_epithet_str = "{$matches['2']}-{$matches['3']}"; $check_res = $this->db->checkSpecificEpithet(array($specific_epithet_str)); foreach ($check_res as $ck) { if ($ck->count > 0 && mb_strtolower($ck->specificEpithet) == mb_strtolower($specific_epithet_str)) { $text_str = str_ireplace_first("{$matches['2']} {$matches['3']}", "{$matches['2']}-{$matches['3']}", $text_str); $this->this_preprocessed_txt = str_ireplace_first("{$matches['2']} {$matches['3']}", "{$matches['2']}-{$matches['3']}", $this->this_preprocessed_txt); } } } $this->this_cleaned_txt = $text_str; $token = explode(" ", $text_str); for ($i = 0; $i < count($token); $i++) { if (preg_match('/^[[:alpha:]]+\\.?$/u', $token[$i])) { if ($i == 0) { $token[$i] = mb_strtoupper(mb_substr($token[$i], 0, 1)) . mb_strtolower(mb_substr($token[$i], 1)); } elseif (mb_strtoupper($token[$i]) == $token[$i]) { $token[$i] = mb_strtolower($token[$i]); } else { break; } } } $text_str = implode(" ", $token); // Clearing the temporary tables //$this->db->clearTempTables(); // includes stripping of presumed non-relevant content including subgenera, comments, cf's, aff's, etc... to // Normalizing the search text $n = new Normalize($this->db); $this->debug['process'][] = "3 (text_str:{$text_str})"; if (!$this->chop_overload) { // leave presumed genus + species + authority (in this instance), with genus and species in uppercase $splitter = new Splitter($n, $text_str); $this->this_search_genus = $this_search_genus = $splitter->get('genus'); $this->this_search_species = $this_search_species = $splitter->get('species'); $this->this_authorities = $splitter->get('authors'); $this->this_authority = $this_authority = end($this->this_authorities); if (preg_match("/^gen(us)?\$/i", $this->this_search_genus)) { $this->this_search_genus = ''; } if (preg_match("/^sp(p|ecies)?\$/i", $this->this_search_species)) { $this->this_search_species = ''; } $infraspecies = $splitter->get('infraspecies'); if (isset($infraspecies)) { if (isset($infraspecies[0])) { if ($infraspecies[0][0] != 'n/a') { $this->this_search_rank1 = $infraspecies[0][0]; } $this->this_search_infra1 = $infraspecies[0][1]; } if (isset($infraspecies[1])) { if ($infraspecies[1][0] != 'n/a') { $this->this_search_rank2 = $infraspecies[1][0]; } $this->this_search_infra2 = $infraspecies[1][1]; } } if (NAME_PARSER == 'gni') { $this->gni_parser_result = $splitter->parsed_response; } } if ($this->parse_only) { return true; } // cache_flag switch detemines if caching is allowed for the source if ($this->cache_flag == true) { if ($this_search_genus != '' && $this_search_species != '' && $this_authority != '') { $cache_key = $this_search_genus . '-' . $this_search_species . '-' . $this_authority . '_' . $search_mode; $cache_path = $this->cache_path . $this->db->source . "/authority/"; } else { if ($this_search_genus != '' && $this_search_species != '') { $cache_key = $this_search_genus . '-' . $this_search_species . '_' . $search_mode; $cache_path = $this->cache_path . $this->db->source . "/species/"; } else { if ($this_search_genus != '') { $cache_key = $this_search_genus . '_' . $search_mode; $cache_path = $this->cache_path . $this->db->source . "/genus/"; } } } $this->mkdir_recursive($cache_path); $this->_cache = new Cache($cache_path); $this->_cache->setKey($cache_key); } $cache_loop_flag = false; if ($cache == true && $this->cache_flag == true) { if ($this->_cache->cache_exists()) { $cache_loop_flag = true; } } if (!$cache_loop_flag) { $search_str = $this->this_search_genus; if ($this->this_search_species) { $search_str .= ' ' . $this_search_species; } if ($this->this_search_infra1) { if ($this->this_search_rank1 != '') { $search_str .= ' ' . $this->this_search_rank1; } $search_str .= ' ' . $this->this_search_infra1; } if ($this->this_search_infra2) { if ($this->this_search_rank2 != '') { $search_str .= ' ' . $this->this_search_rank2; } $search_str .= ' ' . $this->this_search_infra2; } $searchScientificName = $this->db->searchScientificName(array($text_str, $search_str)); if (isset($searchScientificName)) { $has_match = 0; foreach ($searchScientificName as $returnedScientificName) { if ($returnedScientificName->specificEpithet != '') { $has_match = 1; if ($returnedScientificName->nameRank != 'species' && $returnedScientificName->nameRank != 'nothospecies') { if ($returnedScientificName->infraspecificEpithet2 || $this->this_search_infra2) { $this->saveInfra2Matches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 0, 0, 'Y'); } elseif ($returnedScientificName->infraspecificEpithet || $this->this_search_infra1) { $this->saveInfra1Matches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 0, 'Y'); } } else { $this->saveSpeciesMatches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 'Y'); } } elseif ($returnedScientificName->genus != '') { $has_match = 1; $this->saveGenusMatches($returnedScientificName->nameID, $returnedScientificName->genus, 0, 'Y'); } } if ($has_match) { return true; } } $this->debug['process'][] = "3a (this_search_genus:{$this_search_genus}) (this_search_species:{$this_search_species}) (this_authority:{$this_authority})"; $nm = new NearMatch(); $this_near_match_genus = $nm->near_match($this_search_genus); $this_near_match_species = ''; $this->debug['process'][] = "3b (this_near_match_genus:{$this_near_match_genus})"; //TODO refactor inside of a method $this_genus_start = substr($this_search_genus, 0, 3); $this_genus_end = substr($this_search_genus, -3); $this_genus_length = strlen($this_search_genus); //TODO_END $this->debug['process'][] = "3c (this_search_genus,{$this_search_genus}) (this_genus_start:{$this_genus_start}) (this_genus_end:{$this_genus_end}) (this_genus_length:{$this_genus_length})"; if ($this_search_species != '') { $this_near_match_species = $nm->near_match($this_search_species, 'epithet_only'); $this_species_length = strlen($this_search_species); $this->debug['process'][] = "4 (this_search_species:{$this_search_species}) (this_near_match_species:{$this_near_match_species}) (this_species_length:{$this_species_length})"; } // now look for exact or near matches on genus first select candidate genera for edit distance (MDLD) test // for drec in genus_cur loop -- includes the genus pre-filter (main portion) $genus_res = $this->db->genus_cur3($this->search_mode, $this_near_match_genus, $this_near_match_species, $this_genus_length, $this_genus_start, $this_genus_end); # $this->debug['process'][] = array("5 (genus_res)" => $genus_res); $genus_matches = array(); if (count($genus_res)) { // EJS -- attempt to reduce the amount of species_cur // this will be the naive approach foreach ($genus_res as $drec) { $genus_match = $this->match_genera($this_search_genus, $drec->search_genus_name); if ($genus_match['match']) { // don't include a genus already in the array if (!array_key_exists($drec->genus_id, $genus_matches)) { $phonetic_flag = $genus_match['phonetic_match'] ? 'Y' : null; $this->saveGenusMatches($drec->genus_id, $drec->genus, $genus_match['edit_distance'], $phonetic_flag); $this->genera_tested++; } $genus_matches[$drec->genus_id] = $genus_match; } } // end foreach } $species_matches = array(); if ($this_search_species != '' && count($genus_matches)) { $species_res = $this->db->species_cur_in2(array_keys($genus_matches), $this_species_length); if (isset($species_res)) { foreach ($species_res as $drec) { $species_epithets_match = $this->match_species_epithets($this_search_species, $drec->search_species_name); $genus_match = $genus_matches[$drec->genus_id]; $binomials_match = $this->match_matches(array($genus_match, $species_epithets_match)); if ($binomials_match['match']) { if (!array_key_exists($drec->species_id, $species_matches)) { $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null; $this->saveSpeciesMatches($drec->species_id, $drec->genus_species, $genus_match['edit_distance'], $species_epithets_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag); $this->species_tested++; } $species_epithets_match['genus_match'] = $genus_match; $species_matches[$drec->species_id] = $species_epithets_match; } } // EJS -- end } } $infra1_matches = array(); if ($this->this_search_infra1 != '' && count($species_matches)) { $this_infra1 = $this->this_search_infra1; $this_rank1 = $this->this_search_rank1; $this_infra1_length = strlen($this_infra1); $infra1_res = $this->db->infra1_cur_in(array_keys($species_matches), $this_infra1_length); if (isset($infra1_res)) { foreach ($infra1_res as $drec) { $infra1_match = $this->match_species_epithets($this_infra1, $drec->search_infra1_name); $species_match = $species_matches[$drec->species_id]; $genus_match = $species_match["genus_match"]; $binomials_match = $this->match_matches(array($genus_match, $species_match, $infra1_match)); if ($binomials_match['match']) { if (!array_key_exists($drec->infra1_id, $infra1_matches)) { $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null; $this->saveInfra1Matches($drec->infra1_id, $drec->species_infra1, $genus_match['edit_distance'], $species_match['edit_distance'], $infra1_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag); } $infra1_match["species_match"] = $species_match; $infra1_matches[$drec->infra1_id] = $infra1_match; } } // EJS -- end } } $infra2_matches = array(); if ($this->this_search_infra2 != '' && count($infra1_matches)) { $this_infra2 = $this->this_search_infra2; $this_rank2 = $this->this_search_rank2; $this_infra2_length = strlen($this_infra2); $infra2_res = $this->db->infra2_cur_in(array_keys($species_matches), $this_infra2_length); if (isset($infra2_res)) { foreach ($infra2_res as $drec) { $infra2_match = $this->match_species_epithets($this_infra2, $drec->search_infra2_name); $infra1_match = $infra1_matches[$drec->infra1_id]; $species_match = $infra1_match['species_match']; $genus_match = $species_match["genus_match"]; $binomials_match = $this->match_matches(array($genus_match, $species_match, $infra1_match, $infra2_match)); if ($binomials_match['match']) { if (!array_key_exists($drec->infra2_id, $infra2_matches)) { $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null; $this->saveInfra2Matches($drec->infra1_id, $drec->species_infra1, $genus_match['edit_distance'], $species_match['edit_distance'], $infra2_match['edit_distance'], $infra1_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag); } $infra2_match["infra1_match"] = $infra1_match; $infra2_matches[$drec->infra2_id] = $infra2_match; } } // EJS -- end } } } // End Cache Loop Flag return true; }
public function aggregate($taxamatch = null) { $taxamatch_result = $taxamatch->getData(); $parsed = array(); $parsed["family"] = $taxamatch->this_search_family; $parsed["genus"] = $taxamatch->this_search_genus; $parsed["species"] = $taxamatch->this_search_species; $parsed["infra1"] = $taxamatch->this_search_infra1; $parsed["infra2"] = $taxamatch->this_search_infra2; $parsed["authority"] = $taxamatch->this_authority; $parsed["authorities"] = $taxamatch->this_authorities; $parsed["rank1"] = $taxamatch->this_search_rank1; $parsed["rank2"] = $taxamatch->this_search_rank2; $start_string = $taxamatch->this_start_string; $family_string = $taxamatch->this_family_string; $family_unmatched = $taxamatch->this_family_unmatched; $status_string = $taxamatch->this_status_string; $cleaned_txt = $taxamatch->this_cleaned_txt; $preprocessed_txt = $taxamatch->this_preprocessed_txt; $scientificname = $taxamatch->searchtxt; if ($taxamatch->parse_only) { $unparsed = $preprocessed_txt; foreach (array_keys($parsed) as $pk) { if ($pk == 'authority') { continue; } elseif ($pk == 'authorities') { foreach ($parsed[$pk] as $auth) { if ($auth) { $unparsed = str_ireplace_first($auth, '', $unparsed); } } } elseif ($parsed[$pk]) { $unparsed = str_ireplace_first($parsed[$pk], '', $unparsed); if ($parsed[$pk] == 'f.' && ($pk == "rank1" || $pk == "rank2")) { $unparsed = str_ireplace_first(self::$forma, '', $unparsed); } } } if (isset($start_string)) { $unparsed = $start_string . $unparsed; } $unparsed = trim($unparsed); $parsed_rank1 = array_key_exists($parsed["rank1"], self::$standard_rank) ? self::$standard_rank[$parsed["rank1"]] : $parsed["rank1"]; $parsed_rank2 = array_key_exists($parsed["rank2"], self::$standard_rank) ? self::$standard_rank[$parsed["rank2"]] : $parsed["rank2"]; $this->data[] = array('Name_submitted' => $scientificname, 'Family' => $parsed["family"], 'Genus' => $parsed["genus"], 'Specific_epithet' => $parsed["species"], 'Infraspecific_rank' => $parsed_rank1, 'Infraspecific_epithet' => $parsed["infra1"], 'Infraspecific_rank_2' => $parsed_rank2, 'Infraspecific_epithet_2' => $parsed["infra2"], 'Author' => $parsed["authority"], 'Annotations' => $status_string, 'Unmatched_terms' => $unparsed); return true; } $lowest_parsed_rank = ''; $lowest_matched_rank = ''; foreach (array_keys(self::$rank) as $rk) { if ($parsed[$rk]) { $lowest_parsed_rank = $rk; } } $gni_parser_result = null; if (isset($taxamatch_result["gni_parser_result"])) { $gni_parser_result = $taxamatch_result["gni_parser_result"]; } $all_id = array(); $all_name = array(); $matched = array(); $matched_by_source = array(); $synonym = array(); $name_source_data = array(); $classification_family = array(); $phonetic_id = array(); $alt_accepted_family = array(); $family_matched = array(); $genus_matched = array(); $species_matched = array(); $infra1_matched = array(); $infra2_matched = array(); $all_matched = array(); $best_ed = array(); $higher_taxa_ed = array(); foreach (array_keys(self::$rank) as $rk) { $all_matched[$rk] = array(); if (isset($taxamatch_result[$rk]) && count($taxamatch_result[$rk])) { foreach (array_keys(self::$match_type) as $type) { if (isset($taxamatch_result[$rk][$type])) { if (!isset($best_ed[$rk]) || $best_ed[$rk] > self::$match_type[$type]) { $best_ed[$rk] = self::$match_type[$type]; } foreach ($taxamatch_result[$rk][$type] as &$match) { $match["match_type"] = $type; $match["match_score"] = 0; $match["Lowest_matched_rank"] = $rk; $match["Name_matched_id"] = $match[$rk . "_id"]; $all_id[] = $match["Name_matched_id"]; if ($type == 'exact' || $type == 'phonetic') { $phonetic_id[$match["Name_matched_id"]] = 1; } } $all_matched[$rk] = array_merge($all_matched[$rk], $taxamatch_result[$rk][$type]); } } } } if (count($all_id)) { $name_res = $this->db->getScientificName($all_id); $accepted_name_id = array(); foreach ($name_res as $nm) { $all_name[$nm->nameID] = $nm; $name_source_data[$nm->nameID][$nm->sourceID]['url'] = $nm->name_source_url; $name_source_data[$nm->nameID][$nm->sourceID]['lsid'] = $nm->lsid; } $miss_accepted_name_id = array(); $synonym_res = $this->db->getSynonym($all_id); foreach ($synonym_res as $sn) { $nid = $sn->nameID; $sid = $sn->sourceID; if ($sn->acceptance == 'Accepted') { $synonym[$nid][$sid]["accepted_name_id"] = $nid; $synonym[$nid][$sid]["acceptance"] = $sn->acceptance; //} elseif ($sn->acceptance == 'Synonym' || $sn->acceptance == 'Invalid' || $sn->acceptance == 'Illegitimate') { } elseif (!is_null($sn->acceptance)) { if ($sn->accepted_name_id) { $accepted_name_id[] = $sn->accepted_name_id; $synonym[$nid][$sid]["accepted_name_id"] = $sn->accepted_name_id; } $synonym[$nid][$sid]["acceptance"] = $sn->acceptance; } } //print_r($synonym); if (count($accepted_name_id)) { $name_res = $this->db->getScientificName($accepted_name_id); foreach ($name_res as $nm) { $all_name[$nm->nameID] = $nm; $name_source_data[$nm->nameID][$nm->sourceID]['url'] = $nm->name_source_url; $name_source_data[$nm->nameID][$nm->sourceID]['lsid'] = $nm->lsid; } } //print_r($all_name); $classificationfamily_res = $this->db->getClassificationFamily(array_keys($all_name)); $cl_fam = array(); foreach ($classificationfamily_res as $cf) { $cl_fam[$cf->nameID][$cf->sourceID] = $cf->family; } $source_id = array(); if (isset($this->db->classification_id)) { $source_id[] = $this->db->classification_id; } else { $source_id = array_keys($this->db->source_name); } foreach (array_keys($cl_fam) as $nid) { foreach ($source_id as $sid) { if (isset($cl_fam[$nid][$sid])) { $classification_family[$nid] = $cl_fam[$nid][$sid]; break; } } } } //print_r($name_source_data); //print_r($classification_family); if (count($all_matched['family'])) { $fm = array(); foreach ($all_matched['family'] as $fam) { $fm[] = $fam['family']; } $fam_res = $this->db->getFamilyAcceptedFamily($fm); foreach ($fam_res as $fam) { $alt_accepted_family[$fam->family] = $fam->accepted_family; } } foreach (array_keys(self::$rank) as $rk) { foreach ($all_matched[$rk] as &$match) { if (self::$rank[$rk] > self::$rank['family']) { $match["Family_matched"] = ""; $match["family_ed"] = 0; if ($parsed["family"]) { $match["family_ed"] = strlen($parsed["family"]); $accepted_family = isset($classification_family[$match["Name_matched_id"]]) ? $classification_family[$match["Name_matched_id"]] : ''; $genus = $all_name[$match["Name_matched_id"]]->genus; $family_filtered = $this->filterFamily($all_matched["family"], $accepted_family, $genus, $alt_accepted_family); if (count($family_filtered)) { $match["family_ed"] = $family_filtered[0]["family_ed"]; $match["Family_matched"] = $family_filtered[0]["family"]; $match["Family_score"] = self::getEDScore($match["family_ed"], $match["Family_matched"], $parsed["family"]); $match["match_score"] += $match["Family_score"]; } } } } } if (count($all_matched["infra2"])) { foreach ($all_matched["infra2"] as &$match) { if (isset($higher_taxa_ed["infra2"]) && $higher_taxa_ed["infra2"] <= $match["infra2_ed"]) { continue; } $name = $all_name[$match["Name_matched_id"]]; $match["Genus_matched"] = $name->genus; $match["Specific_epithet_matched"] = $name->specificEpithet; $match["Infraspecific_rank"] = $name->rankIndicator; $match["Infraspecific_epithet_matched"] = $name->infraspecificEpithet; $match["Infraspecific_rank_2"] = $name->infraspecificRank2; $match["Infraspecific_epithet_2_matched"] = $name->infraspecificEpithet2; $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]); $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]); $match["Infraspecific_epithet_score"] = self::getEDScore($match["infra1_ed"], $match["Infraspecific_epithet_matched"], $parsed["infra1"]); $match["Infraspecific_epithet_2_score"] = self::getEDScore($match["infra2_ed"], $match["Infraspecific_epithet_2_matched"], $parsed["infra2"]); $match["Name_matched"] = $match["genus_species_infra1_infra2"]; if (!isset($higher_taxa_ed["infra1"]) || $higher_taxa_ed["infra1"] > $match["infra1_ed"]) { $higher_taxa_ed["infra1"] = $match["infra1_ed"]; } if (!isset($higher_taxa_ed["species"]) || $higher_taxa_ed["species"] > $match["species_ed"]) { $higher_taxa_ed["species"] = $match["species_ed"]; } if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) { $higher_taxa_ed["genus"] = $match["genus_ed"]; } if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) { $higher_taxa_ed["family"] = $match["family_ed"]; } if (self::cmpRank($match["Infraspecific_rank"], $parsed["rank1"]) !== true) { $match["Infraspecific_epithet_score"] -= self::$rank_penalty; } if (self::cmpRank($match["Infraspecific_rank_2"], $parsed["rank2"]) !== true) { $match["Infraspecific_epithet_2_score"] -= self::$rank_penalty; } $match["match_score"] += $match["Specific_epithet_score"] + $match["Genus_score"] + $match["Infraspecific_epithet_score"] + $match["Infraspecific_epithet_2_score"]; $matched[] = $match; } } if (count($all_matched["infra1"]) && (!isset($higher_taxa_ed["infra1"]) || isset($best_ed["infra1"]) && $higher_taxa_ed["infra1"] > $best_ed["infra1"])) { foreach ($all_matched["infra1"] as &$match) { if (isset($higher_taxa_ed["infra1"]) && $higher_taxa_ed["infra1"] <= $match["infra1_ed"]) { continue; } $name = $all_name[$match["Name_matched_id"]]; $match["Genus_matched"] = $name->genus; $match["Specific_epithet_matched"] = $name->specificEpithet; $match["Infraspecific_rank"] = $name->rankIndicator; $match["Infraspecific_epithet_matched"] = $name->infraspecificEpithet; $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]); $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]); $match["Infraspecific_epithet_score"] = self::getEDScore($match["infra1_ed"], $match["Infraspecific_epithet_matched"], $parsed["infra1"]); $match["Name_matched"] = $match["genus_species_infra1"]; if (!isset($higher_taxa_ed["species"]) || $higher_taxa_ed["species"] > $match["species_ed"]) { $higher_taxa_ed["species"] = $match["species_ed"]; } if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) { $higher_taxa_ed["genus"] = $match["genus_ed"]; } if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) { $higher_taxa_ed["family"] = $match["family_ed"]; } if (self::cmpRank($match["Infraspecific_rank"], $parsed["rank1"]) !== true) { $match["Infraspecific_epithet_score"] -= self::$rank_penalty; } $match["match_score"] += $match["Specific_epithet_score"] + $match["Genus_score"] + $match["Infraspecific_epithet_score"]; $matched[] = $match; } } if (count($all_matched["species"]) && (!isset($higher_taxa_ed["species"]) || isset($best_ed["species"]) && $higher_taxa_ed["species"] > $best_ed["species"])) { foreach ($all_matched["species"] as &$match) { if (isset($higher_taxa_ed["species"]) && $higher_taxa_ed["species"] <= $match["species_ed"]) { continue; } $name = $all_name[$match["Name_matched_id"]]; $match["Genus_matched"] = $name->genus; $match["Specific_epithet_matched"] = $name->specificEpithet; $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]); $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]); $match["Name_matched"] = $match["genus_species"]; if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) { $higher_taxa_ed["genus"] = $match["genus_ed"]; } if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) { $higher_taxa_ed["family"] = $match["family_ed"]; } $match["match_score"] += $match["Genus_score"] + $match["Specific_epithet_score"]; $matched[] = $match; } } if (count($all_matched["genus"]) && (!isset($higher_taxa_ed["genus"]) || isset($best_ed["genus"]) && $higher_taxa_ed["genus"] > $best_ed["genus"])) { foreach ($all_matched["genus"] as &$match) { if (isset($higher_taxa_ed["genus"]) && $higher_taxa_ed["genus"] <= $match["genus_ed"]) { continue; } $name = $all_name[$match["Name_matched_id"]]; $match["Genus_matched"] = $name->genus; $match["Specific_epithet_matched"] = ''; $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]); $match["Specific_epithet_score"] = ''; $match["Name_matched"] = $match["Genus_matched"]; if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) { $higher_taxa_ed["family"] = $match["family_ed"]; } $match["match_score"] += $match["Genus_score"]; $matched[] = $match; } } if (count($all_matched["family"]) && (!isset($higher_taxa_ed["family"]) || isset($best_ed["family"]) && $higher_taxa_ed["family"] > $best_ed["family"])) { foreach ($all_matched["family"] as &$match) { if (isset($higher_taxa_ed["family"]) && $higher_taxa_ed["family"] <= $match["family_ed"]) { continue; } $match["Family_matched"] = $match["family"]; $match["Specific_epithet_matched"] = ''; $match["Genus_matched"] = ''; $match["Family_score"] = self::getEDScore($match["family_ed"], $match["Family_matched"], $parsed["family"]); $match["Genus_score"] = ''; $match["Specific_epithet_score"] = ''; $match["Name_matched"] = $match["Family_matched"]; $match["match_score"] += $match["Family_score"]; $matched[] = $match; } } if (!count($matched)) { $matched[] = array("Lowest_matched_rank" => "", "Name_score" => "", "Family_matched" => "", "Author_score" => "", "Name_matched_accepted_family" => "", "Warnings" => 0); } $status = $status_string; if ($status) { if (preg_match("/vel\\.? sp\\.? aff\\.?/i", $status)) { $status = "vel. sp. aff."; } elseif (preg_match("/\\-?aff?\\.?/i", $status)) { $status = "aff."; } elseif (preg_match("/\\-?cf\\.?/i", $status) || $status == '?') { $status = "cf."; } } $unmatched = array(); foreach ($matched as &$match) { $match["Name_submitted"] = $scientificname; $match["Family_submitted"] = $parsed["family"]; $match["Genus_submitted"] = $parsed["genus"]; $match["Specific_epithet_submitted"] = $parsed["species"]; $match["Author_submitted"] = $parsed["authority"]; $match["Annotations"] = $status; $rk = $match["Lowest_matched_rank"]; $parsed_part = 0; $matched_part = 0; if ($rk) { $matched_part = self::$rank[$rk]; if ($match["Family_matched"]) { $matched_part++; } } if ($lowest_parsed_rank) { $parsed_part = self::$rank[$lowest_parsed_rank]; } if ($parsed["family"]) { $parsed_part++; } $unmatched = ''; $unmatched_part = 0; $extra_part = 0; if (!$rk || self::$rank[$rk] < self::$rank['genus'] || $gni_parser_result && $gni_parser_result->scientificName->parsed) { $unmatched = $preprocessed_txt; } elseif ($preprocessed_txt != $cleaned_txt) { $unmatched = str_ireplace_first($cleaned_txt, '', $preprocessed_txt); } if ($match["Family_matched"]) { $unmatched = str_ireplace_first($parsed['family'], '', $unmatched); } if ($rk && $gni_parser_result && $gni_parser_result->scientificName->parsed) { foreach (array_keys(self::$rank) as $rki) { if (self::$rank[$rki] >= self::$rank['genus'] && self::$rank[$rki] <= self::$rank[$rk]) { $unmatched = str_ireplace_first($parsed[$rki], '', $unmatched); } } foreach ($parsed["authorities"] as $auth) { if ($auth) { $unmatched = str_ireplace_first($auth, '', $unmatched); } } if (($rk == 'infra1' || $rk == 'infra2') && $parsed["rank1"]) { $unmatched = str_ireplace_first($parsed["rank1"], '', $unmatched); $matched_part++; if ($parsed["rank1"] == 'f.') { $unmatched = str_ireplace_first(self::$forma, '', $unmatched); } } if ($rk == 'infra2' && $parsed["rank2"]) { $unmatched = str_ireplace_first($parsed["rank2"], '', $unmatched); $matched_part++; if ($parsed["rank2"] == 'f.') { $unmatched = str_ireplace_first(self::$forma, '', $unmatched); } } if ($all_name[$match["Name_matched_id"]]->isHybrid) { $unmatched = str_ireplace_first(' x', ' ', $unmatched); $unmatched = str_ireplace_first(' ×', ' ', $unmatched); } } //print_r($matched); if (strlen($unmatched)) { $unmatched = trim(preg_replace("/ {2,}/", ' ', $unmatched)); } if (strlen($unmatched)) { $unmatched_part = count(explode(" ", $unmatched)); $extra_part = $unmatched_part + $matched_part - $parsed_part; if ($parsed["rank1"]) { $extra_part--; } if ($parsed["rank2"]) { $extra_part--; } if ($extra_part > 1) { $unmatched_part -= $extra_part - 1; } } if (isset($start_string)) { $unmatched = $start_string . $unmatched; } $match["Unmatched_terms"] = trim($unmatched); if ($rk) { $matched_id[] = $match["Name_matched_id"]; $match["Canonical_author"] = $match["temp_authority"]; if (array_key_exists($match[$rk . "_id"], $phonetic_id)) { $match["Phonetic"] = 'Y'; } else { $match["Phonetic"] = ""; } if (!isset($match["Warnings"])) { $match["Warnings"] = 0; } if (isset($gni_parser_result) && $lowest_parsed_rank != $match["Lowest_matched_rank"]) { $match["Warnings"] |= self::$flag_def['Partial']; } if (!$lowest_matched_rank || self::$rank[$match["Lowest_matched_rank"]] > self::$rank[$lowest_matched_rank]) { $lowest_matched_rank = $match["Lowest_matched_rank"]; } if ($parsed["authority"] && !($match["Warnings"] & self::$flag_def['Partial'])) { $match["Author_matched"] = $match["temp_authority"]; $match["Author_score"] = $match["auth_similarity"]; } else { $match["Author_matched"] = ''; $match["Author_score"] = ''; } #$match["Name_score"]=$match["match_score"]/$parsed_part; $match["Name_score"] = self::num_to_score($match["match_score"], $parsed_part, 2, 1); #$match["Overall_score"]=self::getOverallScore($match["match_score"], $match["Author_score"], $matched_part+$unmatched_part); $match["Overall_score"] = self::getOverallScore($match["Name_score"], $match["Author_score"], $extra_part); } } $source_order = 0; foreach ($this->db->source as $so) { $source_order++; $matched_by_source[$so] = array(); $sid = $this->db->source_id[$so]; foreach ($matched as &$match) { if (isset($match["Name_matched_id"])) { $nid = $match["Name_matched_id"]; if (array_key_exists($sid, $name_source_data[$nid])) { $match["Source"] = $so; $match["Source_order"] = $source_order; $match["Name_matched_rank"] = $all_name[$nid]->nameRank; $match["Name_matched_accepted_family"] = isset($classification_family[$nid]) ? $classification_family[$nid] : ''; $match["Name_matched_url"] = $name_source_data[$nid][$sid]['url']; $match["Name_matched_lsid"] = $name_source_data[$nid][$sid]['lsid']; $match["Taxonomic_status"] = isset($synonym[$nid][$sid]["acceptance"]) ? $synonym[$nid][$sid]["acceptance"] : 'No opinion'; $match["Accepted_name"] = ''; if (isset($synonym[$nid][$sid]["accepted_name_id"])) { $match["Accepted_name_id"] = $synonym[$nid][$sid]["accepted_name_id"]; $accepted_name = $all_name[$match["Accepted_name_id"]]; $match["Accepted_name"] = $accepted_name->scientific_name; $match["Accepted_name_author"] = $accepted_name->author; $match["Accepted_name_url"] = $name_source_data[$match["Accepted_name_id"]][$sid]['url']; $match["Accepted_name_lsid"] = $name_source_data[$match["Accepted_name_id"]][$sid]['lsid']; $match["Accepted_name_rank"] = $accepted_name->nameRank; $match["Accepted_family"] = isset($classification_family[$match["Accepted_name_id"]]) ? $classification_family[$match["Accepted_name_id"]] : ""; } $match["Accepted_species"] = ""; if ($match["Accepted_name"] && self::$rank[$match["Lowest_matched_rank"]] >= self::$rank["species"]) { if ($all_name[$match["Accepted_name_id"]]->isHybrid) { $match["Accepted_species"] = $all_name[$match["Accepted_name_id"]]->scientific_name; } else { $match["Accepted_species"] = $all_name[$match["Accepted_name_id"]]->genus . ' ' . $all_name[$match["Accepted_name_id"]]->specificEpithet; } } $matched_by_source[$so][] = $match; } } else { $matched_by_source[""] = $match; } } } $matched = array(); if (isset($matched_by_source[""])) { $matched[] = $matched_by_source[""]; } else { foreach ($this->db->source as $so) { foreach ($matched_by_source[$so] as &$match) { $matched[] = $match; } } } self::$sort_scheme = 'overall'; usort($matched, array($this, "cmpMatched")); $order = 1; foreach ($matched as &$match) { $match["Overall_score_order"] = $order++; } self::$sort_scheme = 'highertaxa'; usort($matched, array($this, "cmpMatched")); $order = 1; foreach ($matched as &$match) { $match["Highertaxa_score_order"] = $order++; } foreach ($matched as &$match) { if (isset($match['Name_matched_id']) && isset(self::$ambiguous[$match['Name_matched_id']])) { $match['Warnings'] |= self::$flag_def['Ambiguous']; } if ($match["Highertaxa_score_order"] > $match["Overall_score_order"]) { $match['Warnings'] |= self::$flag_def['HigherTaxa']; } elseif ($match["Highertaxa_score_order"] < $match["Overall_score_order"]) { $match['Warnings'] |= self::$flag_def['Overall']; } } foreach ($matched as &$match) { $result = array(); foreach (self::$field as $fd) { if (array_key_exists($fd, $match) && !is_null($match[$fd])) { $result[$fd] = $match[$fd]; } else { $result[$fd] = ''; } } $this->data[] = $result; } }