function get_synonyms_count($batch_size = 500000) { $raw_stats = array(); for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) { $this->print_status($i, $batch_size); $sql = "SELECT he.taxon_concept_id, s.name_id, s.hierarchy_id\n FROM hierarchy_entries he FORCE INDEX (concept_published_visible)\n JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id\n WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ")\n AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id; if ($this->test_taxon_concept_ids) { $sql .= " AND he.taxon_concept_id IN (" . $this->test_taxon_concept_ids . ")"; } else { $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size); } foreach ($this->mysqli_slave->iterate_file($sql) as $row_number => $row) { $taxon_concept_id = trim($row[0]); $name_id = trim($row[1]); $hierarchy_id = trim($row[2]); $raw_stats[$taxon_concept_id]['synonyms'][$name_id] = 1; $raw_stats[$taxon_concept_id]['synonym_providers'][$hierarchy_id] = 1; } //convert associative array to a regular array foreach ($raw_stats as $taxon_concept_id => $stats) { $new_value = isset($stats['synonyms']) ? count($stats['synonyms']) : ''; $new_value .= "\t" . (isset($stats['synonym_providers']) ? count($stats['synonym_providers']) : ''); $raw_stats[$taxon_concept_id] = $new_value; } $this->save_category_stats($raw_stats, "get_synonyms_count"); $raw_stats = array(); if ($this->test_taxon_concept_ids) { break; } } }
function get_synonyms_count($batch_size = 500000) { $time_start = time_elapsed(); $arr_taxa = array(); $enable = 1; if (!$enable) { self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms"); unset($arr_taxa); return; } $tc_name_id = array(); $tc_hierarchy_id = array(); for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) { print "\n synonyms and its providers [11 of 14] {$i} \n"; $sql = "SELECT he.taxon_concept_id tc_id, s.name_id, s.hierarchy_id h_id FROM hierarchy_entries he JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ") AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id; if (isset($GLOBALS['test_taxon_concept_ids'])) { $sql .= " and he.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")"; } else { $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size); } $outfile = $this->mysqli_slave->select_into_outfile($sql); $FILE = fopen($outfile, "r"); if (!$FILE) { print "!! ERROR: Could not read {$outfile}"; debug("!! ERROR: Could not read {$outfile}"); return; } $num_rows = 0; while (!feof($FILE)) { if ($line = fgets($FILE)) { $num_rows++; $line = trim($line); $fields = explode("\t", $line); $tc_id = trim($fields[0]); $name_id = trim($fields[1]); $h_id = trim($fields[2]); $tc_name_id[$tc_id][$name_id] = ''; $tc_hierarchy_id[$tc_id][$h_id] = ''; } } fclose($FILE); unlink($outfile); print "\n num_rows: {$num_rows}"; } foreach ($tc_name_id as $id => $rec) { @($arr_taxa[$id]['count'] = sizeof($rec)); } unset($tc_name_id); foreach ($tc_hierarchy_id as $id => $rec) { @($arr_taxa[$id]['providers'] = sizeof($rec)); } unset($tc_hierarchy_id); //convert associative array to a regular array foreach ($arr_taxa as $tc_id => $taxon_synonym_counts) { $new_value = ""; $new_value .= "\t" . @$taxon_synonym_counts['count']; $new_value .= "\t" . @$taxon_synonym_counts['providers']; $arr_taxa[$tc_id] = $new_value; } print "\n get_synonyms_count():" . (time_elapsed() - $time_start) / 60 . " minutes"; self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms"); unset($arr_taxa); }