Пример #1
0
 function get_synonyms_count($batch_size = 500000)
 {
     $raw_stats = array();
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         $this->print_status($i, $batch_size);
         $sql = "SELECT he.taxon_concept_id, s.name_id,  s.hierarchy_id\n              FROM hierarchy_entries he FORCE INDEX (concept_published_visible)\n              JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id\n              WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ")\n              AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id;
         if ($this->test_taxon_concept_ids) {
             $sql .= " AND he.taxon_concept_id IN (" . $this->test_taxon_concept_ids . ")";
         } else {
             $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         foreach ($this->mysqli_slave->iterate_file($sql) as $row_number => $row) {
             $taxon_concept_id = trim($row[0]);
             $name_id = trim($row[1]);
             $hierarchy_id = trim($row[2]);
             $raw_stats[$taxon_concept_id]['synonyms'][$name_id] = 1;
             $raw_stats[$taxon_concept_id]['synonym_providers'][$hierarchy_id] = 1;
         }
         //convert associative array to a regular array
         foreach ($raw_stats as $taxon_concept_id => $stats) {
             $new_value = isset($stats['synonyms']) ? count($stats['synonyms']) : '';
             $new_value .= "\t" . (isset($stats['synonym_providers']) ? count($stats['synonym_providers']) : '');
             $raw_stats[$taxon_concept_id] = $new_value;
         }
         $this->save_category_stats($raw_stats, "get_synonyms_count");
         $raw_stats = array();
         if ($this->test_taxon_concept_ids) {
             break;
         }
     }
 }
Пример #2
0
 function get_synonyms_count($batch_size = 500000)
 {
     $time_start = time_elapsed();
     $arr_taxa = array();
     $enable = 1;
     if (!$enable) {
         self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms");
         unset($arr_taxa);
         return;
     }
     $tc_name_id = array();
     $tc_hierarchy_id = array();
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         print "\n synonyms and its providers [11 of 14] {$i} \n";
         $sql = "SELECT he.taxon_concept_id tc_id, s.name_id, s.hierarchy_id h_id FROM hierarchy_entries he JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ") AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id;
         if (isset($GLOBALS['test_taxon_concept_ids'])) {
             $sql .= " and he.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")";
         } else {
             $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         $outfile = $this->mysqli_slave->select_into_outfile($sql);
         $FILE = fopen($outfile, "r");
         if (!$FILE) {
             print "!! ERROR: Could not read {$outfile}";
             debug("!! ERROR: Could not read {$outfile}");
             return;
         }
         $num_rows = 0;
         while (!feof($FILE)) {
             if ($line = fgets($FILE)) {
                 $num_rows++;
                 $line = trim($line);
                 $fields = explode("\t", $line);
                 $tc_id = trim($fields[0]);
                 $name_id = trim($fields[1]);
                 $h_id = trim($fields[2]);
                 $tc_name_id[$tc_id][$name_id] = '';
                 $tc_hierarchy_id[$tc_id][$h_id] = '';
             }
         }
         fclose($FILE);
         unlink($outfile);
         print "\n num_rows: {$num_rows}";
     }
     foreach ($tc_name_id as $id => $rec) {
         @($arr_taxa[$id]['count'] = sizeof($rec));
     }
     unset($tc_name_id);
     foreach ($tc_hierarchy_id as $id => $rec) {
         @($arr_taxa[$id]['providers'] = sizeof($rec));
     }
     unset($tc_hierarchy_id);
     //convert associative array to a regular array
     foreach ($arr_taxa as $tc_id => $taxon_synonym_counts) {
         $new_value = "";
         $new_value .= "\t" . @$taxon_synonym_counts['count'];
         $new_value .= "\t" . @$taxon_synonym_counts['providers'];
         $arr_taxa[$tc_id] = $new_value;
     }
     print "\n get_synonyms_count():" . (time_elapsed() - $time_start) / 60 . " minutes";
     self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms");
     unset($arr_taxa);
 }