Ejemplo n.º 1
0
 /**
  * Function : process
  * Purpose: Perform exact and fuzzy matching on a species name, or single genus name
  * Input: - genus, genus+species, or genus+species+authority (in this version), as "searchtxt"
  *        - "search_mode" to control search mode: currently normal (default) / rapid / no_shaping
  *        - "debug" - print internal parameters used if not null
  * Outputs: list of genera and species that match (or near match) input terms, with associated
  *   ancillary info as desired
  * Remarks:
  *   (1) This demo version is configured to access base data in three tables:
  *          - genlist_test1 (genus info); primary key (PK) is genus_id
  *          - splist_test1 (species info); PK is species_id, has genus_id as foreign key (FK)
  *              (= link to relevant row in genus table)
  *          - auth_abbrev_test1 (authority abbreviations - required by subsidiary function
  *            "normalize_auth". Refer README file for relevant minimum table definitions.
  *       If authority comparisons are not required, calls to "normalize_auth" can be disabled and
  *         relevant function commented out, removing need for third table.
  *       (In a production system, table and column names can be varied as desired so long as
  *         code is altered at relevant points, also could be re-configured to hold all genus+species info together in a single table with minor re-write).
  *   (2) Writes to and reads back from pre-defined global temporary tables
  *      "genus_id_matches" and "species_id_matches", new instances of these are automatically
  *      created for each session (i.e., do not need clearing at procedure end). Refer
  *      README file for relevant table definitions.
  *   (3) When result shaping is on in this version, a relevant message displayed as required
  *      for developer feedback, if more distant results are being masked (in producton version,
  *       possibly would not do this)
  *   (4) Requires the following subsidiary functions (supplied elsewhere in this package):
  *         - normalize
  *         - normalize_auth
  *         - reduce_spaces
  *         - ngram
  *         - compare_auth
  *         - near_match
  *         - mdld
  *   (5) Accepts "+" as input separator in place of space (e.g. "H**o+sapiens"), e.g. for calling
  *         via a HTTP GET request as needed.
  * @param string $searchtxt : genus, genus+species, or genus+species+authority
  * @param string $search_mode : normal (default) / rapid / no_shaping
  * @param boolean $cache
  * @return boolean
  */
 public function process($searchtxt, $search_mode = 'normal', $cache = false)
 {
     $this->input = $searchtxt;
     $this->search_mode = $search_mode;
     $this->searchtxt = $searchtxt;
     $this->debug['process'][] = "1 (searchtxt:{$searchtxt}) (search_mode:{$search_mode})";
     $this->this_search_family = '';
     $this->this_search_genus = '';
     $this->this_search_species = '';
     $this->this_authority = '';
     $this->this_authorities = array();
     $this->this_search_infra1 = '';
     $this->this_search_infra2 = '';
     $this->this_search_rank1 = '';
     $this->this_search_rank2 = '';
     $this->this_start_string = '';
     $this->this_cleaned_txt = '';
     $this->this_family_string = '';
     $this->this_family_unmatched = '';
     $this->this_status_string = '';
     $text_str = $searchtxt;
     // accept "+" as separator if supplied, tranform to space
     if (strpos($text_str, '+') !== false) {
         $text_str = str_replace('+', ' ', $text_str);
     }
     #$replace=array("%", "<", "{", "}", "&", "_", "\t");
     $replace = array("\t");
     $text_str = str_replace($replace, ' ', $text_str);
     if (strpos($text_str, '  ') !== false) {
         $text_str = preg_replace("/ {2,}/", ' ', $text_str);
     }
     $text_str = trim($text_str);
     $this->debug['process'][] = "1a (text_str:{$text_str})";
     if (is_null($text_str) || $text_str == '') {
         $this->debug['process'][] = "2 Return(false)";
         return false;
     }
     if (preg_match('/^[^[:alpha:]]+/u', $text_str, $start_matches)) {
         $text_str = str_replace($start_matches[0], '', $text_str);
         $this->this_start_string = $start_matches[0];
     }
     if (preg_match("/(?:(?:\\s|^)(?:\\-?cf\\.?|vel\\.? sp\\.? aff\\.?|\\-?aff\\.?)(?:\\s|\$))|(?:\\?+)/i", $text_str, $anno_matches)) {
         $text_str = trim(str_replace($anno_matches[0], ' ', $text_str));
         $this->this_status_string = trim($anno_matches[0]);
     }
     $text_str = str_replace(' -', '-', $text_str);
     $text_str = str_replace('- ', '-', $text_str);
     $this->this_preprocessed_txt = $text_str;
     $text_str = preg_replace("/(?<=\\s|^)(?:\\S*[^[:alpha:][:space:]])?(indeterminad[ao]|undetermined|unknown|indet\\.?|sp\\.?\\s+nov\\.?|sp\\.?)(?:[^[:alpha:][:space:]]\\S*)?(?=\\s|\$)/i", ' ', $text_str);
     if (strpos($text_str, '  ') !== false) {
         $text_str = preg_replace("/ {2,}/", ' ', $text_str);
     }
     $text_str = trim($text_str);
     if (preg_match('/^(((?:[[:alpha:]]+aceae)|Cruciferae|Guttiferae|Umbelliferae|Compositae|Leguminosae|Palmae|Labiatae|Gramineae|Mimosoideae|Papilionoideae|Caesalpinioideae|fam(?:ily)?)((?:[^[:alpha:][:space:]]\\S*)?))(?=\\s+|$)/i', $text_str, $fam_matches)) {
         $text_str = trim(str_replace($fam_matches[0], '', $text_str));
         $this->this_family_string = $fam_matches[1];
         #$this->this_search_family=$fam_matches[2];
         $this->this_search_family = mb_strtoupper(mb_substr($fam_matches[2], 0, 1)) . mb_strtolower(mb_substr($fam_matches[2], 1));
         $this->this_family_unmatched = $fam_matches[3];
         if (preg_match("/^fam(ily)?\$/i", $this->this_search_family)) {
             $this->this_family_unmatched = $this->this_search_family . $this->this_family_unmatched;
             $this->this_search_family = '';
         }
         if (!$this->parse_only && $this->this_search_family) {
             $searchFamilyName = $this->db->searchFamilyName($this->this_search_family);
             if (isset($searchFamilyName)) {
                 foreach ($searchFamilyName as $returnedFamilyName) {
                     $this->saveFamilyMatches($returnedFamilyName->nameID, $this->this_search_family, 0, 'Y');
                 }
             }
             $nm = new NearMatch();
             $this_near_match_family = $nm->near_match($this->this_search_family);
             $this_family_start = substr($this->this_search_family, 0, 3);
             $this_family_length = strlen($this->this_search_family);
             $family_res = $this->db->family_cur($this->search_mode, $this_near_match_family, $this_family_length, $this_family_start);
             if (count($family_res)) {
                 foreach ($family_res as $drec) {
                     $family_match = $this->match_family($this->this_search_family, $drec->search_family_name);
                     if ($family_match['match']) {
                         $phonetic_flag = $family_match['phonetic_match'] ? 'Y' : null;
                         $this->saveFamilyMatches($drec->family_id, $drec->family, $family_match['edit_distance'], $phonetic_flag);
                     }
                 }
                 // end foreach
             }
         }
     }
     if ($text_str == 'exit' || $text_str == 'end' || $text_str == 'q' || $text_str == '.') {
         return true;
     }
     //unhyphened trinormial
     if (preg_match('/^([[:alpha:]]+) ([[:alpha:]]+)[\\.\\s]([[:alpha:]]+)(.*)/', $text_str, $matches)) {
         $specific_epithet_str = "{$matches['2']}-{$matches['3']}";
         $check_res = $this->db->checkSpecificEpithet(array($specific_epithet_str));
         foreach ($check_res as $ck) {
             if ($ck->count > 0 && mb_strtolower($ck->specificEpithet) == mb_strtolower($specific_epithet_str)) {
                 $text_str = str_ireplace_first("{$matches['2']} {$matches['3']}", "{$matches['2']}-{$matches['3']}", $text_str);
                 $this->this_preprocessed_txt = str_ireplace_first("{$matches['2']} {$matches['3']}", "{$matches['2']}-{$matches['3']}", $this->this_preprocessed_txt);
             }
         }
     }
     $this->this_cleaned_txt = $text_str;
     $token = explode(" ", $text_str);
     for ($i = 0; $i < count($token); $i++) {
         if (preg_match('/^[[:alpha:]]+\\.?$/u', $token[$i])) {
             if ($i == 0) {
                 $token[$i] = mb_strtoupper(mb_substr($token[$i], 0, 1)) . mb_strtolower(mb_substr($token[$i], 1));
             } elseif (mb_strtoupper($token[$i]) == $token[$i]) {
                 $token[$i] = mb_strtolower($token[$i]);
             } else {
                 break;
             }
         }
     }
     $text_str = implode(" ", $token);
     // Clearing the temporary tables
     //$this->db->clearTempTables();
     // includes stripping of presumed non-relevant content including subgenera, comments, cf's, aff's, etc... to
     // Normalizing the search text
     $n = new Normalize($this->db);
     $this->debug['process'][] = "3 (text_str:{$text_str})";
     if (!$this->chop_overload) {
         // leave presumed genus + species + authority (in this instance), with  genus and species in uppercase
         $splitter = new Splitter($n, $text_str);
         $this->this_search_genus = $this_search_genus = $splitter->get('genus');
         $this->this_search_species = $this_search_species = $splitter->get('species');
         $this->this_authorities = $splitter->get('authors');
         $this->this_authority = $this_authority = end($this->this_authorities);
         if (preg_match("/^gen(us)?\$/i", $this->this_search_genus)) {
             $this->this_search_genus = '';
         }
         if (preg_match("/^sp(p|ecies)?\$/i", $this->this_search_species)) {
             $this->this_search_species = '';
         }
         $infraspecies = $splitter->get('infraspecies');
         if (isset($infraspecies)) {
             if (isset($infraspecies[0])) {
                 if ($infraspecies[0][0] != 'n/a') {
                     $this->this_search_rank1 = $infraspecies[0][0];
                 }
                 $this->this_search_infra1 = $infraspecies[0][1];
             }
             if (isset($infraspecies[1])) {
                 if ($infraspecies[1][0] != 'n/a') {
                     $this->this_search_rank2 = $infraspecies[1][0];
                 }
                 $this->this_search_infra2 = $infraspecies[1][1];
             }
         }
         if (NAME_PARSER == 'gni') {
             $this->gni_parser_result = $splitter->parsed_response;
         }
     }
     if ($this->parse_only) {
         return true;
     }
     // cache_flag switch detemines if caching is allowed for the source
     if ($this->cache_flag == true) {
         if ($this_search_genus != '' && $this_search_species != '' && $this_authority != '') {
             $cache_key = $this_search_genus . '-' . $this_search_species . '-' . $this_authority . '_' . $search_mode;
             $cache_path = $this->cache_path . $this->db->source . "/authority/";
         } else {
             if ($this_search_genus != '' && $this_search_species != '') {
                 $cache_key = $this_search_genus . '-' . $this_search_species . '_' . $search_mode;
                 $cache_path = $this->cache_path . $this->db->source . "/species/";
             } else {
                 if ($this_search_genus != '') {
                     $cache_key = $this_search_genus . '_' . $search_mode;
                     $cache_path = $this->cache_path . $this->db->source . "/genus/";
                 }
             }
         }
         $this->mkdir_recursive($cache_path);
         $this->_cache = new Cache($cache_path);
         $this->_cache->setKey($cache_key);
     }
     $cache_loop_flag = false;
     if ($cache == true && $this->cache_flag == true) {
         if ($this->_cache->cache_exists()) {
             $cache_loop_flag = true;
         }
     }
     if (!$cache_loop_flag) {
         $search_str = $this->this_search_genus;
         if ($this->this_search_species) {
             $search_str .= ' ' . $this_search_species;
         }
         if ($this->this_search_infra1) {
             if ($this->this_search_rank1 != '') {
                 $search_str .= ' ' . $this->this_search_rank1;
             }
             $search_str .= ' ' . $this->this_search_infra1;
         }
         if ($this->this_search_infra2) {
             if ($this->this_search_rank2 != '') {
                 $search_str .= ' ' . $this->this_search_rank2;
             }
             $search_str .= ' ' . $this->this_search_infra2;
         }
         $searchScientificName = $this->db->searchScientificName(array($text_str, $search_str));
         if (isset($searchScientificName)) {
             $has_match = 0;
             foreach ($searchScientificName as $returnedScientificName) {
                 if ($returnedScientificName->specificEpithet != '') {
                     $has_match = 1;
                     if ($returnedScientificName->nameRank != 'species' && $returnedScientificName->nameRank != 'nothospecies') {
                         if ($returnedScientificName->infraspecificEpithet2 || $this->this_search_infra2) {
                             $this->saveInfra2Matches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 0, 0, 'Y');
                         } elseif ($returnedScientificName->infraspecificEpithet || $this->this_search_infra1) {
                             $this->saveInfra1Matches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 0, 'Y');
                         }
                     } else {
                         $this->saveSpeciesMatches($returnedScientificName->nameID, $returnedScientificName->scientificName, 0, 0, 0, 'Y');
                     }
                 } elseif ($returnedScientificName->genus != '') {
                     $has_match = 1;
                     $this->saveGenusMatches($returnedScientificName->nameID, $returnedScientificName->genus, 0, 'Y');
                 }
             }
             if ($has_match) {
                 return true;
             }
         }
         $this->debug['process'][] = "3a (this_search_genus:{$this_search_genus}) (this_search_species:{$this_search_species}) (this_authority:{$this_authority})";
         $nm = new NearMatch();
         $this_near_match_genus = $nm->near_match($this_search_genus);
         $this_near_match_species = '';
         $this->debug['process'][] = "3b (this_near_match_genus:{$this_near_match_genus})";
         //TODO refactor inside of a method
         $this_genus_start = substr($this_search_genus, 0, 3);
         $this_genus_end = substr($this_search_genus, -3);
         $this_genus_length = strlen($this_search_genus);
         //TODO_END
         $this->debug['process'][] = "3c (this_search_genus,{$this_search_genus}) (this_genus_start:{$this_genus_start}) (this_genus_end:{$this_genus_end}) (this_genus_length:{$this_genus_length})";
         if ($this_search_species != '') {
             $this_near_match_species = $nm->near_match($this_search_species, 'epithet_only');
             $this_species_length = strlen($this_search_species);
             $this->debug['process'][] = "4 (this_search_species:{$this_search_species}) (this_near_match_species:{$this_near_match_species}) (this_species_length:{$this_species_length})";
         }
         // now look for exact or near matches on genus first select candidate genera for edit distance (MDLD) test
         // for drec in genus_cur loop -- includes the genus pre-filter (main portion)
         $genus_res = $this->db->genus_cur3($this->search_mode, $this_near_match_genus, $this_near_match_species, $this_genus_length, $this_genus_start, $this_genus_end);
         #				$this->debug['process'][] = array("5 (genus_res)" => $genus_res);
         $genus_matches = array();
         if (count($genus_res)) {
             // EJS -- attempt to reduce the amount of species_cur
             // this will be the naive approach
             foreach ($genus_res as $drec) {
                 $genus_match = $this->match_genera($this_search_genus, $drec->search_genus_name);
                 if ($genus_match['match']) {
                     // don't include a genus already in the array
                     if (!array_key_exists($drec->genus_id, $genus_matches)) {
                         $phonetic_flag = $genus_match['phonetic_match'] ? 'Y' : null;
                         $this->saveGenusMatches($drec->genus_id, $drec->genus, $genus_match['edit_distance'], $phonetic_flag);
                         $this->genera_tested++;
                     }
                     $genus_matches[$drec->genus_id] = $genus_match;
                 }
             }
             // end foreach
         }
         $species_matches = array();
         if ($this_search_species != '' && count($genus_matches)) {
             $species_res = $this->db->species_cur_in2(array_keys($genus_matches), $this_species_length);
             if (isset($species_res)) {
                 foreach ($species_res as $drec) {
                     $species_epithets_match = $this->match_species_epithets($this_search_species, $drec->search_species_name);
                     $genus_match = $genus_matches[$drec->genus_id];
                     $binomials_match = $this->match_matches(array($genus_match, $species_epithets_match));
                     if ($binomials_match['match']) {
                         if (!array_key_exists($drec->species_id, $species_matches)) {
                             $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null;
                             $this->saveSpeciesMatches($drec->species_id, $drec->genus_species, $genus_match['edit_distance'], $species_epithets_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag);
                             $this->species_tested++;
                         }
                         $species_epithets_match['genus_match'] = $genus_match;
                         $species_matches[$drec->species_id] = $species_epithets_match;
                     }
                 }
                 // EJS -- end
             }
         }
         $infra1_matches = array();
         if ($this->this_search_infra1 != '' && count($species_matches)) {
             $this_infra1 = $this->this_search_infra1;
             $this_rank1 = $this->this_search_rank1;
             $this_infra1_length = strlen($this_infra1);
             $infra1_res = $this->db->infra1_cur_in(array_keys($species_matches), $this_infra1_length);
             if (isset($infra1_res)) {
                 foreach ($infra1_res as $drec) {
                     $infra1_match = $this->match_species_epithets($this_infra1, $drec->search_infra1_name);
                     $species_match = $species_matches[$drec->species_id];
                     $genus_match = $species_match["genus_match"];
                     $binomials_match = $this->match_matches(array($genus_match, $species_match, $infra1_match));
                     if ($binomials_match['match']) {
                         if (!array_key_exists($drec->infra1_id, $infra1_matches)) {
                             $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null;
                             $this->saveInfra1Matches($drec->infra1_id, $drec->species_infra1, $genus_match['edit_distance'], $species_match['edit_distance'], $infra1_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag);
                         }
                         $infra1_match["species_match"] = $species_match;
                         $infra1_matches[$drec->infra1_id] = $infra1_match;
                     }
                 }
                 // EJS -- end
             }
         }
         $infra2_matches = array();
         if ($this->this_search_infra2 != '' && count($infra1_matches)) {
             $this_infra2 = $this->this_search_infra2;
             $this_rank2 = $this->this_search_rank2;
             $this_infra2_length = strlen($this_infra2);
             $infra2_res = $this->db->infra2_cur_in(array_keys($species_matches), $this_infra2_length);
             if (isset($infra2_res)) {
                 foreach ($infra2_res as $drec) {
                     $infra2_match = $this->match_species_epithets($this_infra2, $drec->search_infra2_name);
                     $infra1_match = $infra1_matches[$drec->infra1_id];
                     $species_match = $infra1_match['species_match'];
                     $genus_match = $species_match["genus_match"];
                     $binomials_match = $this->match_matches(array($genus_match, $species_match, $infra1_match, $infra2_match));
                     if ($binomials_match['match']) {
                         if (!array_key_exists($drec->infra2_id, $infra2_matches)) {
                             $binomial_phonetic_flag = $binomials_match['phonetic_match'] ? 'Y' : null;
                             $this->saveInfra2Matches($drec->infra1_id, $drec->species_infra1, $genus_match['edit_distance'], $species_match['edit_distance'], $infra2_match['edit_distance'], $infra1_match['edit_distance'], $binomials_match['edit_distance'], $binomial_phonetic_flag);
                         }
                         $infra2_match["infra1_match"] = $infra1_match;
                         $infra2_matches[$drec->infra2_id] = $infra2_match;
                     }
                 }
                 // EJS -- end
             }
         }
     }
     // End Cache Loop Flag
     return true;
 }
Ejemplo n.º 2
0
 public function aggregate($taxamatch = null)
 {
     $taxamatch_result = $taxamatch->getData();
     $parsed = array();
     $parsed["family"] = $taxamatch->this_search_family;
     $parsed["genus"] = $taxamatch->this_search_genus;
     $parsed["species"] = $taxamatch->this_search_species;
     $parsed["infra1"] = $taxamatch->this_search_infra1;
     $parsed["infra2"] = $taxamatch->this_search_infra2;
     $parsed["authority"] = $taxamatch->this_authority;
     $parsed["authorities"] = $taxamatch->this_authorities;
     $parsed["rank1"] = $taxamatch->this_search_rank1;
     $parsed["rank2"] = $taxamatch->this_search_rank2;
     $start_string = $taxamatch->this_start_string;
     $family_string = $taxamatch->this_family_string;
     $family_unmatched = $taxamatch->this_family_unmatched;
     $status_string = $taxamatch->this_status_string;
     $cleaned_txt = $taxamatch->this_cleaned_txt;
     $preprocessed_txt = $taxamatch->this_preprocessed_txt;
     $scientificname = $taxamatch->searchtxt;
     if ($taxamatch->parse_only) {
         $unparsed = $preprocessed_txt;
         foreach (array_keys($parsed) as $pk) {
             if ($pk == 'authority') {
                 continue;
             } elseif ($pk == 'authorities') {
                 foreach ($parsed[$pk] as $auth) {
                     if ($auth) {
                         $unparsed = str_ireplace_first($auth, '', $unparsed);
                     }
                 }
             } elseif ($parsed[$pk]) {
                 $unparsed = str_ireplace_first($parsed[$pk], '', $unparsed);
                 if ($parsed[$pk] == 'f.' && ($pk == "rank1" || $pk == "rank2")) {
                     $unparsed = str_ireplace_first(self::$forma, '', $unparsed);
                 }
             }
         }
         if (isset($start_string)) {
             $unparsed = $start_string . $unparsed;
         }
         $unparsed = trim($unparsed);
         $parsed_rank1 = array_key_exists($parsed["rank1"], self::$standard_rank) ? self::$standard_rank[$parsed["rank1"]] : $parsed["rank1"];
         $parsed_rank2 = array_key_exists($parsed["rank2"], self::$standard_rank) ? self::$standard_rank[$parsed["rank2"]] : $parsed["rank2"];
         $this->data[] = array('Name_submitted' => $scientificname, 'Family' => $parsed["family"], 'Genus' => $parsed["genus"], 'Specific_epithet' => $parsed["species"], 'Infraspecific_rank' => $parsed_rank1, 'Infraspecific_epithet' => $parsed["infra1"], 'Infraspecific_rank_2' => $parsed_rank2, 'Infraspecific_epithet_2' => $parsed["infra2"], 'Author' => $parsed["authority"], 'Annotations' => $status_string, 'Unmatched_terms' => $unparsed);
         return true;
     }
     $lowest_parsed_rank = '';
     $lowest_matched_rank = '';
     foreach (array_keys(self::$rank) as $rk) {
         if ($parsed[$rk]) {
             $lowest_parsed_rank = $rk;
         }
     }
     $gni_parser_result = null;
     if (isset($taxamatch_result["gni_parser_result"])) {
         $gni_parser_result = $taxamatch_result["gni_parser_result"];
     }
     $all_id = array();
     $all_name = array();
     $matched = array();
     $matched_by_source = array();
     $synonym = array();
     $name_source_data = array();
     $classification_family = array();
     $phonetic_id = array();
     $alt_accepted_family = array();
     $family_matched = array();
     $genus_matched = array();
     $species_matched = array();
     $infra1_matched = array();
     $infra2_matched = array();
     $all_matched = array();
     $best_ed = array();
     $higher_taxa_ed = array();
     foreach (array_keys(self::$rank) as $rk) {
         $all_matched[$rk] = array();
         if (isset($taxamatch_result[$rk]) && count($taxamatch_result[$rk])) {
             foreach (array_keys(self::$match_type) as $type) {
                 if (isset($taxamatch_result[$rk][$type])) {
                     if (!isset($best_ed[$rk]) || $best_ed[$rk] > self::$match_type[$type]) {
                         $best_ed[$rk] = self::$match_type[$type];
                     }
                     foreach ($taxamatch_result[$rk][$type] as &$match) {
                         $match["match_type"] = $type;
                         $match["match_score"] = 0;
                         $match["Lowest_matched_rank"] = $rk;
                         $match["Name_matched_id"] = $match[$rk . "_id"];
                         $all_id[] = $match["Name_matched_id"];
                         if ($type == 'exact' || $type == 'phonetic') {
                             $phonetic_id[$match["Name_matched_id"]] = 1;
                         }
                     }
                     $all_matched[$rk] = array_merge($all_matched[$rk], $taxamatch_result[$rk][$type]);
                 }
             }
         }
     }
     if (count($all_id)) {
         $name_res = $this->db->getScientificName($all_id);
         $accepted_name_id = array();
         foreach ($name_res as $nm) {
             $all_name[$nm->nameID] = $nm;
             $name_source_data[$nm->nameID][$nm->sourceID]['url'] = $nm->name_source_url;
             $name_source_data[$nm->nameID][$nm->sourceID]['lsid'] = $nm->lsid;
         }
         $miss_accepted_name_id = array();
         $synonym_res = $this->db->getSynonym($all_id);
         foreach ($synonym_res as $sn) {
             $nid = $sn->nameID;
             $sid = $sn->sourceID;
             if ($sn->acceptance == 'Accepted') {
                 $synonym[$nid][$sid]["accepted_name_id"] = $nid;
                 $synonym[$nid][$sid]["acceptance"] = $sn->acceptance;
                 //} elseif ($sn->acceptance == 'Synonym' || $sn->acceptance == 'Invalid' || $sn->acceptance == 'Illegitimate') {
             } elseif (!is_null($sn->acceptance)) {
                 if ($sn->accepted_name_id) {
                     $accepted_name_id[] = $sn->accepted_name_id;
                     $synonym[$nid][$sid]["accepted_name_id"] = $sn->accepted_name_id;
                 }
                 $synonym[$nid][$sid]["acceptance"] = $sn->acceptance;
             }
         }
         //print_r($synonym);
         if (count($accepted_name_id)) {
             $name_res = $this->db->getScientificName($accepted_name_id);
             foreach ($name_res as $nm) {
                 $all_name[$nm->nameID] = $nm;
                 $name_source_data[$nm->nameID][$nm->sourceID]['url'] = $nm->name_source_url;
                 $name_source_data[$nm->nameID][$nm->sourceID]['lsid'] = $nm->lsid;
             }
         }
         //print_r($all_name);
         $classificationfamily_res = $this->db->getClassificationFamily(array_keys($all_name));
         $cl_fam = array();
         foreach ($classificationfamily_res as $cf) {
             $cl_fam[$cf->nameID][$cf->sourceID] = $cf->family;
         }
         $source_id = array();
         if (isset($this->db->classification_id)) {
             $source_id[] = $this->db->classification_id;
         } else {
             $source_id = array_keys($this->db->source_name);
         }
         foreach (array_keys($cl_fam) as $nid) {
             foreach ($source_id as $sid) {
                 if (isset($cl_fam[$nid][$sid])) {
                     $classification_family[$nid] = $cl_fam[$nid][$sid];
                     break;
                 }
             }
         }
     }
     //print_r($name_source_data);
     //print_r($classification_family);
     if (count($all_matched['family'])) {
         $fm = array();
         foreach ($all_matched['family'] as $fam) {
             $fm[] = $fam['family'];
         }
         $fam_res = $this->db->getFamilyAcceptedFamily($fm);
         foreach ($fam_res as $fam) {
             $alt_accepted_family[$fam->family] = $fam->accepted_family;
         }
     }
     foreach (array_keys(self::$rank) as $rk) {
         foreach ($all_matched[$rk] as &$match) {
             if (self::$rank[$rk] > self::$rank['family']) {
                 $match["Family_matched"] = "";
                 $match["family_ed"] = 0;
                 if ($parsed["family"]) {
                     $match["family_ed"] = strlen($parsed["family"]);
                     $accepted_family = isset($classification_family[$match["Name_matched_id"]]) ? $classification_family[$match["Name_matched_id"]] : '';
                     $genus = $all_name[$match["Name_matched_id"]]->genus;
                     $family_filtered = $this->filterFamily($all_matched["family"], $accepted_family, $genus, $alt_accepted_family);
                     if (count($family_filtered)) {
                         $match["family_ed"] = $family_filtered[0]["family_ed"];
                         $match["Family_matched"] = $family_filtered[0]["family"];
                         $match["Family_score"] = self::getEDScore($match["family_ed"], $match["Family_matched"], $parsed["family"]);
                         $match["match_score"] += $match["Family_score"];
                     }
                 }
             }
         }
     }
     if (count($all_matched["infra2"])) {
         foreach ($all_matched["infra2"] as &$match) {
             if (isset($higher_taxa_ed["infra2"]) && $higher_taxa_ed["infra2"] <= $match["infra2_ed"]) {
                 continue;
             }
             $name = $all_name[$match["Name_matched_id"]];
             $match["Genus_matched"] = $name->genus;
             $match["Specific_epithet_matched"] = $name->specificEpithet;
             $match["Infraspecific_rank"] = $name->rankIndicator;
             $match["Infraspecific_epithet_matched"] = $name->infraspecificEpithet;
             $match["Infraspecific_rank_2"] = $name->infraspecificRank2;
             $match["Infraspecific_epithet_2_matched"] = $name->infraspecificEpithet2;
             $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]);
             $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]);
             $match["Infraspecific_epithet_score"] = self::getEDScore($match["infra1_ed"], $match["Infraspecific_epithet_matched"], $parsed["infra1"]);
             $match["Infraspecific_epithet_2_score"] = self::getEDScore($match["infra2_ed"], $match["Infraspecific_epithet_2_matched"], $parsed["infra2"]);
             $match["Name_matched"] = $match["genus_species_infra1_infra2"];
             if (!isset($higher_taxa_ed["infra1"]) || $higher_taxa_ed["infra1"] > $match["infra1_ed"]) {
                 $higher_taxa_ed["infra1"] = $match["infra1_ed"];
             }
             if (!isset($higher_taxa_ed["species"]) || $higher_taxa_ed["species"] > $match["species_ed"]) {
                 $higher_taxa_ed["species"] = $match["species_ed"];
             }
             if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) {
                 $higher_taxa_ed["genus"] = $match["genus_ed"];
             }
             if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) {
                 $higher_taxa_ed["family"] = $match["family_ed"];
             }
             if (self::cmpRank($match["Infraspecific_rank"], $parsed["rank1"]) !== true) {
                 $match["Infraspecific_epithet_score"] -= self::$rank_penalty;
             }
             if (self::cmpRank($match["Infraspecific_rank_2"], $parsed["rank2"]) !== true) {
                 $match["Infraspecific_epithet_2_score"] -= self::$rank_penalty;
             }
             $match["match_score"] += $match["Specific_epithet_score"] + $match["Genus_score"] + $match["Infraspecific_epithet_score"] + $match["Infraspecific_epithet_2_score"];
             $matched[] = $match;
         }
     }
     if (count($all_matched["infra1"]) && (!isset($higher_taxa_ed["infra1"]) || isset($best_ed["infra1"]) && $higher_taxa_ed["infra1"] > $best_ed["infra1"])) {
         foreach ($all_matched["infra1"] as &$match) {
             if (isset($higher_taxa_ed["infra1"]) && $higher_taxa_ed["infra1"] <= $match["infra1_ed"]) {
                 continue;
             }
             $name = $all_name[$match["Name_matched_id"]];
             $match["Genus_matched"] = $name->genus;
             $match["Specific_epithet_matched"] = $name->specificEpithet;
             $match["Infraspecific_rank"] = $name->rankIndicator;
             $match["Infraspecific_epithet_matched"] = $name->infraspecificEpithet;
             $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]);
             $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]);
             $match["Infraspecific_epithet_score"] = self::getEDScore($match["infra1_ed"], $match["Infraspecific_epithet_matched"], $parsed["infra1"]);
             $match["Name_matched"] = $match["genus_species_infra1"];
             if (!isset($higher_taxa_ed["species"]) || $higher_taxa_ed["species"] > $match["species_ed"]) {
                 $higher_taxa_ed["species"] = $match["species_ed"];
             }
             if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) {
                 $higher_taxa_ed["genus"] = $match["genus_ed"];
             }
             if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) {
                 $higher_taxa_ed["family"] = $match["family_ed"];
             }
             if (self::cmpRank($match["Infraspecific_rank"], $parsed["rank1"]) !== true) {
                 $match["Infraspecific_epithet_score"] -= self::$rank_penalty;
             }
             $match["match_score"] += $match["Specific_epithet_score"] + $match["Genus_score"] + $match["Infraspecific_epithet_score"];
             $matched[] = $match;
         }
     }
     if (count($all_matched["species"]) && (!isset($higher_taxa_ed["species"]) || isset($best_ed["species"]) && $higher_taxa_ed["species"] > $best_ed["species"])) {
         foreach ($all_matched["species"] as &$match) {
             if (isset($higher_taxa_ed["species"]) && $higher_taxa_ed["species"] <= $match["species_ed"]) {
                 continue;
             }
             $name = $all_name[$match["Name_matched_id"]];
             $match["Genus_matched"] = $name->genus;
             $match["Specific_epithet_matched"] = $name->specificEpithet;
             $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]);
             $match["Specific_epithet_score"] = self::getEDScore($match["species_ed"], $match["Specific_epithet_matched"], $parsed["species"]);
             $match["Name_matched"] = $match["genus_species"];
             if (!isset($higher_taxa_ed["genus"]) || $higher_taxa_ed["genus"] > $match["genus_ed"]) {
                 $higher_taxa_ed["genus"] = $match["genus_ed"];
             }
             if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) {
                 $higher_taxa_ed["family"] = $match["family_ed"];
             }
             $match["match_score"] += $match["Genus_score"] + $match["Specific_epithet_score"];
             $matched[] = $match;
         }
     }
     if (count($all_matched["genus"]) && (!isset($higher_taxa_ed["genus"]) || isset($best_ed["genus"]) && $higher_taxa_ed["genus"] > $best_ed["genus"])) {
         foreach ($all_matched["genus"] as &$match) {
             if (isset($higher_taxa_ed["genus"]) && $higher_taxa_ed["genus"] <= $match["genus_ed"]) {
                 continue;
             }
             $name = $all_name[$match["Name_matched_id"]];
             $match["Genus_matched"] = $name->genus;
             $match["Specific_epithet_matched"] = '';
             $match["Genus_score"] = self::getEDScore($match["genus_ed"], $match["Genus_matched"], $parsed["genus"]);
             $match["Specific_epithet_score"] = '';
             $match["Name_matched"] = $match["Genus_matched"];
             if (isset($match["family_ed"]) && (!isset($higher_taxa_ed["family"]) || $higher_taxa_ed["family"] > $match["family_ed"])) {
                 $higher_taxa_ed["family"] = $match["family_ed"];
             }
             $match["match_score"] += $match["Genus_score"];
             $matched[] = $match;
         }
     }
     if (count($all_matched["family"]) && (!isset($higher_taxa_ed["family"]) || isset($best_ed["family"]) && $higher_taxa_ed["family"] > $best_ed["family"])) {
         foreach ($all_matched["family"] as &$match) {
             if (isset($higher_taxa_ed["family"]) && $higher_taxa_ed["family"] <= $match["family_ed"]) {
                 continue;
             }
             $match["Family_matched"] = $match["family"];
             $match["Specific_epithet_matched"] = '';
             $match["Genus_matched"] = '';
             $match["Family_score"] = self::getEDScore($match["family_ed"], $match["Family_matched"], $parsed["family"]);
             $match["Genus_score"] = '';
             $match["Specific_epithet_score"] = '';
             $match["Name_matched"] = $match["Family_matched"];
             $match["match_score"] += $match["Family_score"];
             $matched[] = $match;
         }
     }
     if (!count($matched)) {
         $matched[] = array("Lowest_matched_rank" => "", "Name_score" => "", "Family_matched" => "", "Author_score" => "", "Name_matched_accepted_family" => "", "Warnings" => 0);
     }
     $status = $status_string;
     if ($status) {
         if (preg_match("/vel\\.? sp\\.? aff\\.?/i", $status)) {
             $status = "vel. sp. aff.";
         } elseif (preg_match("/\\-?aff?\\.?/i", $status)) {
             $status = "aff.";
         } elseif (preg_match("/\\-?cf\\.?/i", $status) || $status == '?') {
             $status = "cf.";
         }
     }
     $unmatched = array();
     foreach ($matched as &$match) {
         $match["Name_submitted"] = $scientificname;
         $match["Family_submitted"] = $parsed["family"];
         $match["Genus_submitted"] = $parsed["genus"];
         $match["Specific_epithet_submitted"] = $parsed["species"];
         $match["Author_submitted"] = $parsed["authority"];
         $match["Annotations"] = $status;
         $rk = $match["Lowest_matched_rank"];
         $parsed_part = 0;
         $matched_part = 0;
         if ($rk) {
             $matched_part = self::$rank[$rk];
             if ($match["Family_matched"]) {
                 $matched_part++;
             }
         }
         if ($lowest_parsed_rank) {
             $parsed_part = self::$rank[$lowest_parsed_rank];
         }
         if ($parsed["family"]) {
             $parsed_part++;
         }
         $unmatched = '';
         $unmatched_part = 0;
         $extra_part = 0;
         if (!$rk || self::$rank[$rk] < self::$rank['genus'] || $gni_parser_result && $gni_parser_result->scientificName->parsed) {
             $unmatched = $preprocessed_txt;
         } elseif ($preprocessed_txt != $cleaned_txt) {
             $unmatched = str_ireplace_first($cleaned_txt, '', $preprocessed_txt);
         }
         if ($match["Family_matched"]) {
             $unmatched = str_ireplace_first($parsed['family'], '', $unmatched);
         }
         if ($rk && $gni_parser_result && $gni_parser_result->scientificName->parsed) {
             foreach (array_keys(self::$rank) as $rki) {
                 if (self::$rank[$rki] >= self::$rank['genus'] && self::$rank[$rki] <= self::$rank[$rk]) {
                     $unmatched = str_ireplace_first($parsed[$rki], '', $unmatched);
                 }
             }
             foreach ($parsed["authorities"] as $auth) {
                 if ($auth) {
                     $unmatched = str_ireplace_first($auth, '', $unmatched);
                 }
             }
             if (($rk == 'infra1' || $rk == 'infra2') && $parsed["rank1"]) {
                 $unmatched = str_ireplace_first($parsed["rank1"], '', $unmatched);
                 $matched_part++;
                 if ($parsed["rank1"] == 'f.') {
                     $unmatched = str_ireplace_first(self::$forma, '', $unmatched);
                 }
             }
             if ($rk == 'infra2' && $parsed["rank2"]) {
                 $unmatched = str_ireplace_first($parsed["rank2"], '', $unmatched);
                 $matched_part++;
                 if ($parsed["rank2"] == 'f.') {
                     $unmatched = str_ireplace_first(self::$forma, '', $unmatched);
                 }
             }
             if ($all_name[$match["Name_matched_id"]]->isHybrid) {
                 $unmatched = str_ireplace_first(' x', ' ', $unmatched);
                 $unmatched = str_ireplace_first(' ×', ' ', $unmatched);
             }
         }
         //print_r($matched);
         if (strlen($unmatched)) {
             $unmatched = trim(preg_replace("/ {2,}/", ' ', $unmatched));
         }
         if (strlen($unmatched)) {
             $unmatched_part = count(explode(" ", $unmatched));
             $extra_part = $unmatched_part + $matched_part - $parsed_part;
             if ($parsed["rank1"]) {
                 $extra_part--;
             }
             if ($parsed["rank2"]) {
                 $extra_part--;
             }
             if ($extra_part > 1) {
                 $unmatched_part -= $extra_part - 1;
             }
         }
         if (isset($start_string)) {
             $unmatched = $start_string . $unmatched;
         }
         $match["Unmatched_terms"] = trim($unmatched);
         if ($rk) {
             $matched_id[] = $match["Name_matched_id"];
             $match["Canonical_author"] = $match["temp_authority"];
             if (array_key_exists($match[$rk . "_id"], $phonetic_id)) {
                 $match["Phonetic"] = 'Y';
             } else {
                 $match["Phonetic"] = "";
             }
             if (!isset($match["Warnings"])) {
                 $match["Warnings"] = 0;
             }
             if (isset($gni_parser_result) && $lowest_parsed_rank != $match["Lowest_matched_rank"]) {
                 $match["Warnings"] |= self::$flag_def['Partial'];
             }
             if (!$lowest_matched_rank || self::$rank[$match["Lowest_matched_rank"]] > self::$rank[$lowest_matched_rank]) {
                 $lowest_matched_rank = $match["Lowest_matched_rank"];
             }
             if ($parsed["authority"] && !($match["Warnings"] & self::$flag_def['Partial'])) {
                 $match["Author_matched"] = $match["temp_authority"];
                 $match["Author_score"] = $match["auth_similarity"];
             } else {
                 $match["Author_matched"] = '';
                 $match["Author_score"] = '';
             }
             #$match["Name_score"]=$match["match_score"]/$parsed_part;
             $match["Name_score"] = self::num_to_score($match["match_score"], $parsed_part, 2, 1);
             #$match["Overall_score"]=self::getOverallScore($match["match_score"], $match["Author_score"], $matched_part+$unmatched_part);
             $match["Overall_score"] = self::getOverallScore($match["Name_score"], $match["Author_score"], $extra_part);
         }
     }
     $source_order = 0;
     foreach ($this->db->source as $so) {
         $source_order++;
         $matched_by_source[$so] = array();
         $sid = $this->db->source_id[$so];
         foreach ($matched as &$match) {
             if (isset($match["Name_matched_id"])) {
                 $nid = $match["Name_matched_id"];
                 if (array_key_exists($sid, $name_source_data[$nid])) {
                     $match["Source"] = $so;
                     $match["Source_order"] = $source_order;
                     $match["Name_matched_rank"] = $all_name[$nid]->nameRank;
                     $match["Name_matched_accepted_family"] = isset($classification_family[$nid]) ? $classification_family[$nid] : '';
                     $match["Name_matched_url"] = $name_source_data[$nid][$sid]['url'];
                     $match["Name_matched_lsid"] = $name_source_data[$nid][$sid]['lsid'];
                     $match["Taxonomic_status"] = isset($synonym[$nid][$sid]["acceptance"]) ? $synonym[$nid][$sid]["acceptance"] : 'No opinion';
                     $match["Accepted_name"] = '';
                     if (isset($synonym[$nid][$sid]["accepted_name_id"])) {
                         $match["Accepted_name_id"] = $synonym[$nid][$sid]["accepted_name_id"];
                         $accepted_name = $all_name[$match["Accepted_name_id"]];
                         $match["Accepted_name"] = $accepted_name->scientific_name;
                         $match["Accepted_name_author"] = $accepted_name->author;
                         $match["Accepted_name_url"] = $name_source_data[$match["Accepted_name_id"]][$sid]['url'];
                         $match["Accepted_name_lsid"] = $name_source_data[$match["Accepted_name_id"]][$sid]['lsid'];
                         $match["Accepted_name_rank"] = $accepted_name->nameRank;
                         $match["Accepted_family"] = isset($classification_family[$match["Accepted_name_id"]]) ? $classification_family[$match["Accepted_name_id"]] : "";
                     }
                     $match["Accepted_species"] = "";
                     if ($match["Accepted_name"] && self::$rank[$match["Lowest_matched_rank"]] >= self::$rank["species"]) {
                         if ($all_name[$match["Accepted_name_id"]]->isHybrid) {
                             $match["Accepted_species"] = $all_name[$match["Accepted_name_id"]]->scientific_name;
                         } else {
                             $match["Accepted_species"] = $all_name[$match["Accepted_name_id"]]->genus . ' ' . $all_name[$match["Accepted_name_id"]]->specificEpithet;
                         }
                     }
                     $matched_by_source[$so][] = $match;
                 }
             } else {
                 $matched_by_source[""] = $match;
             }
         }
     }
     $matched = array();
     if (isset($matched_by_source[""])) {
         $matched[] = $matched_by_source[""];
     } else {
         foreach ($this->db->source as $so) {
             foreach ($matched_by_source[$so] as &$match) {
                 $matched[] = $match;
             }
         }
     }
     self::$sort_scheme = 'overall';
     usort($matched, array($this, "cmpMatched"));
     $order = 1;
     foreach ($matched as &$match) {
         $match["Overall_score_order"] = $order++;
     }
     self::$sort_scheme = 'highertaxa';
     usort($matched, array($this, "cmpMatched"));
     $order = 1;
     foreach ($matched as &$match) {
         $match["Highertaxa_score_order"] = $order++;
     }
     foreach ($matched as &$match) {
         if (isset($match['Name_matched_id']) && isset(self::$ambiguous[$match['Name_matched_id']])) {
             $match['Warnings'] |= self::$flag_def['Ambiguous'];
         }
         if ($match["Highertaxa_score_order"] > $match["Overall_score_order"]) {
             $match['Warnings'] |= self::$flag_def['HigherTaxa'];
         } elseif ($match["Highertaxa_score_order"] < $match["Overall_score_order"]) {
             $match['Warnings'] |= self::$flag_def['Overall'];
         }
     }
     foreach ($matched as &$match) {
         $result = array();
         foreach (self::$field as $fd) {
             if (array_key_exists($fd, $match) && !is_null($match[$fd])) {
                 $result[$fd] = $match[$fd];
             } else {
                 $result[$fd] = '';
             }
         }
         $this->data[] = $result;
     }
 }