function lookup_names($start, $limit, &$taxon_concept_ids = array()) { debug("querying names"); $query = "SELECT n.id, n.string FROM names n WHERE n.id "; if ($taxon_concept_ids) { $query .= "IN (" . implode(",", $taxon_concept_ids) . ")"; } else { $query .= "BETWEEN {$start} AND " . ($start + $limit); } static $i = 0; foreach ($this->mysqli_slave->iterate_file($query) as $row_num => $row) { if ($i % 100000 == 0) { echo " ===> {$i} :: " . time_elapsed() . " :: " . memory_get_usage() . "\n"; } $i++; $name_id = $row[0]; $name_string = $row[1]; if (preg_match("/[0-9]\\/[0-9]/", $name_string, $arr)) { if (Name::is_surrogate($name_string)) { continue; } $canonical_form = trim($this->name_parser->lookup_string($name_string)); echo "{$name_id}\t{$name_string}\t{$canonical_form}\n"; } } }
private function lookup_block($start, $limit) { $query = "SELECT tc.id, n.string, cf.string, he_parent.taxon_concept_id, tr.label FROM taxon_concepts tc JOIN taxon_concept_preferred_entries tcpe ON (tc.id=tcpe.taxon_concept_id) JOIN hierarchy_entries he ON (tcpe.hierarchy_entry_id=he.id) JOIN names n ON (he.name_id=n.id) LEFT JOIN canonical_forms cf ON (n.ranked_canonical_form_id=cf.id) LEFT JOIN hierarchy_entries he_parent ON (he.parent_id=he_parent.id) LEFT JOIN (ranks r JOIN translated_ranks tr ON (r.id = tr.rank_id AND tr.language_id=152)) ON (he.rank_id=r.id) WHERE tc.id BETWEEN {$start} AND " . ($start + $limit) . " AND he.published=1 AND he.visibility_id=1"; static $j = 0; foreach ($this->mysqli->iterate_file($query) as $row_num => $row) { if ($j % 50000 == 0) { echo "{$start} : {$j} : " . time_elapsed() . " : " . memory_get_usage() . "\n"; } $j++; $taxon_concept_id = $row[0]; $string = trim($row[1]); $ranked_canonical_form = trim($row[2]); $parent_page_id = $row[3]; $rank_label = $row[4]; if (!$parent_page_id || $parent_page_id == "NULL") { $parent_page_id = 0; } if (!$rank_label || $rank_label == "NULL") { $rank_label = ''; } $name = null; if ($ranked_canonical_form != 'NULL' && !Name::is_surrogate($ranked_canonical_form)) { $name = $ranked_canonical_form; } elseif ($string != 'NULL') { $name = $string; } if (!$name) { continue; } // echo "$taxon_concept_id : $name : $parent_page_id : $rank_label\n"; fwrite($this->NAMES_OUT, "{$taxon_concept_id}\t{$name}\t{$parent_page_id}\t{$rank_label}\n"); } }
private function lookup_taxon_name_batch($taxon_concept_ids) { $entry_taxon_concept_ids = array(); foreach ($GLOBALS['db_connection']->iterate("\n SELECT pref.taxon_concept_id, he.id, n.string\n FROM taxon_concept_preferred_entries pref\n JOIN hierarchy_entries he ON (pref.hierarchy_entry_id=he.id)\n LEFT JOIN names n ON (he.name_id=n.id)\n WHERE pref.taxon_concept_id IN (" . implode(",", $taxon_concept_ids) . ")") as $row) { $entry_taxon_concept_ids[$row['id']] = $row['taxon_concept_id']; $this->taxon_concept_names[$row['taxon_concept_id']]['scientificName'] = $row['string']; } $kingdom_ids = Rank::kingdom_rank_ids(); $phylum_ids = Rank::phylum_rank_ids(); // $class_ids = Rank::class_rank_ids(); // $order_ids = Rank::order_rank_ids(); $family_ids = Rank::family_rank_ids(); // $all_rank_ids = array_merge($kingdom_ids, $phylum_ids, $class_ids, $order_ids, $family_ids); $all_rank_ids = array_merge($kingdom_ids, $phylum_ids, $family_ids); foreach ($GLOBALS['db_connection']->iterate("\n SELECT hef.hierarchy_entry_id, he.id, he.rank_id, n.string\n FROM hierarchy_entries_flattened hef\n JOIN hierarchy_entries he ON (hef.ancestor_id=he.id)\n LEFT JOIN names n ON (he.name_id=n.id)\n WHERE hef.hierarchy_entry_id IN (" . implode(",", array_keys($entry_taxon_concept_ids)) . ")\n AND he.rank_id IN (" . implode(",", $all_rank_ids) . ")") as $row) { $taxon_concept_id = $entry_taxon_concept_ids[$row['hierarchy_entry_id']]; $name_string = $row['string']; if (Name::is_surrogate($name_string)) { continue; } if (in_array($row['rank_id'], $kingdom_ids)) { $this->taxon_concept_names[$taxon_concept_id]['kingdom'] = $name_string; } elseif (in_array($row['rank_id'], $phylum_ids)) { $this->taxon_concept_names[$taxon_concept_id]['phylum'] = $name_string; } elseif (in_array($row['rank_id'], $family_ids)) { $this->taxon_concept_names[$taxon_concept_id]['family'] = $name_string; } } }
private function lookup_links($start, $limit) { $hierarchy_ids = array(771, 759, 431, 123, 903, 596, 410, 143, 860); // COL 2011, NCBI, WORMS, Wikipedia, ITIS, IF, Wikimedia Commons, fishbase, avibase $query = "\n SELECT tc.id, he.id hierarchy_entry_id, he.identifier, he.source_url, h.label, h.outlink_uri, res.title, h.id, n.string,\n cf.string, he_parent.id parent_hierarchy_entry_id, he_parent.taxon_concept_id, tr.label, tcm.richness_score\n FROM taxon_concepts tc\n JOIN hierarchy_entries he ON (tc.id=he.taxon_concept_id)\n JOIN names n ON (he.name_id=n.id)\n JOIN hierarchies h ON (he.hierarchy_id=h.id)\n LEFT JOIN taxon_concept_metrics tcm ON (tc.id=tcm.taxon_concept_id)\n LEFT JOIN resources res ON (h.id=res.hierarchy_id)\n LEFT JOIN canonical_forms cf ON (n.ranked_canonical_form_id=cf.id)\n LEFT JOIN hierarchy_entries he_parent ON (he.parent_id=he_parent.id)\n LEFT JOIN (ranks r JOIN translated_ranks tr ON (r.id = tr.rank_id AND tr.language_id=152)) ON (he.rank_id=r.id)\n WHERE tc.id BETWEEN {$start} AND " . ($start + $limit) . "\n AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . "\n AND he.hierarchy_id IN (" . implode(",", $hierarchy_ids) . ")"; $links_from_hierarchy = array(); static $j = 0; foreach ($this->mysqli->iterate_file($query) as $row_num => $row) { if ($j % 10000 == 0) { echo "{$start} : {$j} : " . time_elapsed() . " : " . memory_get_usage() . "\n"; } $j++; $taxon_concept_id = $row[0]; $hierarchy_entry_id = $row[1]; $identifier = $row[2]; $source_url = $row[3]; $hierarchy_label = $row[4]; $outlink_uri = $row[5]; $resource_title = $row[6]; $hierarchy_id = $row[7]; $string = trim($row[8]); $ranked_canonical_form = trim($row[9]); $parent_hierarchy_entry_id = $row[10]; $parent_page_id = $row[11]; $rank_label = $row[12]; $richness_score = $row[13]; if (!$parent_hierarchy_entry_id || $parent_hierarchy_entry_id == "NULL") { $parent_hierarchy_entry_id = 0; } if (!$parent_page_id || $parent_page_id == "NULL") { $parent_page_id = 0; } if (!$richness_score || $richness_score == "NULL") { $richness_score = 0; } if (!$rank_label || $rank_label == "NULL") { $rank_label = ''; } if ($rank_label == "gen.") { $rank_label = 'genus'; } if ($rank_label == "sp.") { $rank_label = 'species'; } if ($rank_label == "subsp.") { $rank_label = 'subspecies'; } if ($rank_label == "var.") { $rank_label = 'variety'; } $richness_score = round($richness_score * 100, 2); if ($resource_title != 'NULL' && ($t = trim($resource_title))) { $title = $t; } elseif ($hierarchy_label != 'NULL' && ($t = trim($hierarchy_label))) { $title = $t; } if (@$links_from_hierarchy[$taxon_concept_id][$hierarchy_id]) { continue; } $name = null; if ($ranked_canonical_form != 'NULL' && !Name::is_surrogate($ranked_canonical_form)) { $name = $ranked_canonical_form; } elseif ($string != 'NULL') { $name = $string; } if (!$name) { continue; } fwrite($this->LINKS_OUT, "{$taxon_concept_id}\t{$hierarchy_entry_id}\t{$name}\t{$parent_page_id}\t{$parent_hierarchy_entry_id}\t{$richness_score}\t{$rank_label}\t{$identifier}\t{$title}\t"); if ($link = $this->prepare_link($row)) { fwrite($this->LINKS_OUT, $link['url']); } fwrite($this->LINKS_OUT, "\n"); $links_from_hierarchy[$taxon_concept_id][$hierarchy_id] = 1; } }