Beispiel #1
0
 function lookup_names($start, $limit, &$taxon_concept_ids = array())
 {
     debug("querying names");
     $query = "SELECT n.id, n.string FROM names n WHERE n.id ";
     if ($taxon_concept_ids) {
         $query .= "IN (" . implode(",", $taxon_concept_ids) . ")";
     } else {
         $query .= "BETWEEN {$start} AND " . ($start + $limit);
     }
     static $i = 0;
     foreach ($this->mysqli_slave->iterate_file($query) as $row_num => $row) {
         if ($i % 100000 == 0) {
             echo "   ===> {$i} :: " . time_elapsed() . " :: " . memory_get_usage() . "\n";
         }
         $i++;
         $name_id = $row[0];
         $name_string = $row[1];
         if (preg_match("/[0-9]\\/[0-9]/", $name_string, $arr)) {
             if (Name::is_surrogate($name_string)) {
                 continue;
             }
             $canonical_form = trim($this->name_parser->lookup_string($name_string));
             echo "{$name_id}\t{$name_string}\t{$canonical_form}\n";
         }
     }
 }
 private function lookup_block($start, $limit)
 {
     $query = "SELECT tc.id, n.string, cf.string, he_parent.taxon_concept_id, tr.label FROM taxon_concepts tc JOIN taxon_concept_preferred_entries tcpe ON (tc.id=tcpe.taxon_concept_id) JOIN hierarchy_entries he ON (tcpe.hierarchy_entry_id=he.id) JOIN names n ON (he.name_id=n.id) LEFT JOIN canonical_forms cf ON (n.ranked_canonical_form_id=cf.id) LEFT JOIN hierarchy_entries he_parent ON (he.parent_id=he_parent.id) LEFT JOIN (ranks r JOIN translated_ranks tr ON (r.id = tr.rank_id AND tr.language_id=152)) ON (he.rank_id=r.id) WHERE tc.id BETWEEN {$start} AND " . ($start + $limit) . " AND he.published=1 AND he.visibility_id=1";
     static $j = 0;
     foreach ($this->mysqli->iterate_file($query) as $row_num => $row) {
         if ($j % 50000 == 0) {
             echo "{$start} : {$j} : " . time_elapsed() . " : " . memory_get_usage() . "\n";
         }
         $j++;
         $taxon_concept_id = $row[0];
         $string = trim($row[1]);
         $ranked_canonical_form = trim($row[2]);
         $parent_page_id = $row[3];
         $rank_label = $row[4];
         if (!$parent_page_id || $parent_page_id == "NULL") {
             $parent_page_id = 0;
         }
         if (!$rank_label || $rank_label == "NULL") {
             $rank_label = '';
         }
         $name = null;
         if ($ranked_canonical_form != 'NULL' && !Name::is_surrogate($ranked_canonical_form)) {
             $name = $ranked_canonical_form;
         } elseif ($string != 'NULL') {
             $name = $string;
         }
         if (!$name) {
             continue;
         }
         // echo "$taxon_concept_id : $name : $parent_page_id : $rank_label\n";
         fwrite($this->NAMES_OUT, "{$taxon_concept_id}\t{$name}\t{$parent_page_id}\t{$rank_label}\n");
     }
 }
 private function lookup_taxon_name_batch($taxon_concept_ids)
 {
     $entry_taxon_concept_ids = array();
     foreach ($GLOBALS['db_connection']->iterate("\n            SELECT pref.taxon_concept_id, he.id, n.string\n            FROM taxon_concept_preferred_entries pref\n            JOIN hierarchy_entries he ON (pref.hierarchy_entry_id=he.id)\n            LEFT JOIN names n ON (he.name_id=n.id)\n            WHERE pref.taxon_concept_id IN (" . implode(",", $taxon_concept_ids) . ")") as $row) {
         $entry_taxon_concept_ids[$row['id']] = $row['taxon_concept_id'];
         $this->taxon_concept_names[$row['taxon_concept_id']]['scientificName'] = $row['string'];
     }
     $kingdom_ids = Rank::kingdom_rank_ids();
     $phylum_ids = Rank::phylum_rank_ids();
     // $class_ids = Rank::class_rank_ids();
     // $order_ids = Rank::order_rank_ids();
     $family_ids = Rank::family_rank_ids();
     // $all_rank_ids = array_merge($kingdom_ids, $phylum_ids, $class_ids, $order_ids, $family_ids);
     $all_rank_ids = array_merge($kingdom_ids, $phylum_ids, $family_ids);
     foreach ($GLOBALS['db_connection']->iterate("\n            SELECT hef.hierarchy_entry_id, he.id, he.rank_id, n.string\n            FROM hierarchy_entries_flattened hef\n            JOIN hierarchy_entries he ON (hef.ancestor_id=he.id)\n            LEFT JOIN names n ON (he.name_id=n.id)\n            WHERE hef.hierarchy_entry_id IN (" . implode(",", array_keys($entry_taxon_concept_ids)) . ")\n            AND he.rank_id IN (" . implode(",", $all_rank_ids) . ")") as $row) {
         $taxon_concept_id = $entry_taxon_concept_ids[$row['hierarchy_entry_id']];
         $name_string = $row['string'];
         if (Name::is_surrogate($name_string)) {
             continue;
         }
         if (in_array($row['rank_id'], $kingdom_ids)) {
             $this->taxon_concept_names[$taxon_concept_id]['kingdom'] = $name_string;
         } elseif (in_array($row['rank_id'], $phylum_ids)) {
             $this->taxon_concept_names[$taxon_concept_id]['phylum'] = $name_string;
         } elseif (in_array($row['rank_id'], $family_ids)) {
             $this->taxon_concept_names[$taxon_concept_id]['family'] = $name_string;
         }
     }
 }
 private function lookup_links($start, $limit)
 {
     $hierarchy_ids = array(771, 759, 431, 123, 903, 596, 410, 143, 860);
     // COL 2011, NCBI, WORMS, Wikipedia, ITIS, IF, Wikimedia Commons, fishbase, avibase
     $query = "\n            SELECT tc.id, he.id hierarchy_entry_id, he.identifier, he.source_url, h.label, h.outlink_uri, res.title, h.id, n.string,\n              cf.string, he_parent.id parent_hierarchy_entry_id, he_parent.taxon_concept_id, tr.label, tcm.richness_score\n            FROM taxon_concepts tc\n            JOIN hierarchy_entries he ON (tc.id=he.taxon_concept_id)\n            JOIN names n ON (he.name_id=n.id)\n            JOIN hierarchies h ON (he.hierarchy_id=h.id)\n            LEFT JOIN taxon_concept_metrics tcm ON (tc.id=tcm.taxon_concept_id)\n            LEFT JOIN resources res ON (h.id=res.hierarchy_id)\n            LEFT JOIN canonical_forms cf ON (n.ranked_canonical_form_id=cf.id)\n            LEFT JOIN hierarchy_entries he_parent ON (he.parent_id=he_parent.id)\n            LEFT JOIN (ranks r JOIN translated_ranks tr ON (r.id = tr.rank_id AND tr.language_id=152)) ON (he.rank_id=r.id)\n            WHERE tc.id BETWEEN {$start} AND " . ($start + $limit) . "\n            AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . "\n            AND he.hierarchy_id IN (" . implode(",", $hierarchy_ids) . ")";
     $links_from_hierarchy = array();
     static $j = 0;
     foreach ($this->mysqli->iterate_file($query) as $row_num => $row) {
         if ($j % 10000 == 0) {
             echo "{$start} : {$j} : " . time_elapsed() . " : " . memory_get_usage() . "\n";
         }
         $j++;
         $taxon_concept_id = $row[0];
         $hierarchy_entry_id = $row[1];
         $identifier = $row[2];
         $source_url = $row[3];
         $hierarchy_label = $row[4];
         $outlink_uri = $row[5];
         $resource_title = $row[6];
         $hierarchy_id = $row[7];
         $string = trim($row[8]);
         $ranked_canonical_form = trim($row[9]);
         $parent_hierarchy_entry_id = $row[10];
         $parent_page_id = $row[11];
         $rank_label = $row[12];
         $richness_score = $row[13];
         if (!$parent_hierarchy_entry_id || $parent_hierarchy_entry_id == "NULL") {
             $parent_hierarchy_entry_id = 0;
         }
         if (!$parent_page_id || $parent_page_id == "NULL") {
             $parent_page_id = 0;
         }
         if (!$richness_score || $richness_score == "NULL") {
             $richness_score = 0;
         }
         if (!$rank_label || $rank_label == "NULL") {
             $rank_label = '';
         }
         if ($rank_label == "gen.") {
             $rank_label = 'genus';
         }
         if ($rank_label == "sp.") {
             $rank_label = 'species';
         }
         if ($rank_label == "subsp.") {
             $rank_label = 'subspecies';
         }
         if ($rank_label == "var.") {
             $rank_label = 'variety';
         }
         $richness_score = round($richness_score * 100, 2);
         if ($resource_title != 'NULL' && ($t = trim($resource_title))) {
             $title = $t;
         } elseif ($hierarchy_label != 'NULL' && ($t = trim($hierarchy_label))) {
             $title = $t;
         }
         if (@$links_from_hierarchy[$taxon_concept_id][$hierarchy_id]) {
             continue;
         }
         $name = null;
         if ($ranked_canonical_form != 'NULL' && !Name::is_surrogate($ranked_canonical_form)) {
             $name = $ranked_canonical_form;
         } elseif ($string != 'NULL') {
             $name = $string;
         }
         if (!$name) {
             continue;
         }
         fwrite($this->LINKS_OUT, "{$taxon_concept_id}\t{$hierarchy_entry_id}\t{$name}\t{$parent_page_id}\t{$parent_hierarchy_entry_id}\t{$richness_score}\t{$rank_label}\t{$identifier}\t{$title}\t");
         if ($link = $this->prepare_link($row)) {
             fwrite($this->LINKS_OUT, $link['url']);
         }
         fwrite($this->LINKS_OUT, "\n");
         $links_from_hierarchy[$taxon_concept_id][$hierarchy_id] = 1;
     }
 }