function ncbi_lca($tax_id_1, $tax_id_2) { global $db; // If tax_id is the same, then return as lca if ($tax_id_1 == $tax_id_2) { return $tax_id_1; } $path_1 = ncbi_ancestors($tax_id_1); $path_2 = ncbi_ancestors($tax_id_2); $path_1 = array_reverse($path_1); $path_2 = array_reverse($path_2); // ensure tax_ids are part of this path (handle case where one node is lca of itself and other node) $path_1[] = $tax_id_1; $path_2[] = $tax_id_2; $i = 0; $m = count($path_1); $n = count($path_2); $x = min($m, $n); //print_r($path_1); //print_r($path_2); while ($path_1[$i] == $path_2[$i] && $i < $x) { $i++; } $lca = $path_1[$i - 1]; return $lca; }
function get_span(&$tree) { if (!isset($tree->translations->tax_id)) { return; } $tree->classification->span = array(); foreach ($tree->translations->tax_id as $k => $v) { $tree->classification->span[] = (int) $v; } // What is tree about? // Get majority rule taxon $c = array(); $stack = array(); foreach ($tree->classification->span as $tax_id) { $ancestors = ncbi_ancestors($tax_id); if (count($ancestors) != 0) { $ancestors = array_reverse($ancestors); foreach ($ancestors as $anc) { if (!isset($c[$anc])) { $c[$anc] = 0; } $c[$anc]++; // Store nodes in stack. This is really a partial order (i.e., a tree), but because we // visit nodes from root to tip, we preserve the order that matters if (!in_array($anc, $stack)) { $stack[] = $anc; } } } } // Compute threshold for majority of taxa $num_taxa = $c[$stack[0]]; $threshold = round($num_taxa / 2); if ($num_taxa % 2 == 0) { $threshold++; } // Go down the stack until we hit a node that is more frequent than the majority rule theshold, // this is what the study is "about" $majority = array_pop($stack); while ($c[$majority] < $threshold) { $majority = array_pop($stack); } // $tree->classification->majority_taxon = get_ncbi_taxon($majority); // Path from majority to root $tree->classification->majority_path = array_reverse(ncbi_ancestors($majority)); // LCA of all taxa in tree $lca = $majority; while ($c[$lca] < $num_taxa) { $lca = array_pop($stack); } // Now make span comprise just unique taxa. We don't do this above because we want to take // the relavtive frequencies of each tax_id into account when computing majority_taxon $tree->classification->span = array_values(array_unique($tree->classification->span)); $tree->classification->lca = $lca; }
$paths = array(); $tree_objs = array(); $result = $db->Execute($sql); if ($result == false) { die("failed [" . __LINE__ . "]: " . $sql); } while (!$result->EOF) { $tree = new stdclass(); $tree->id = $result->fields['id']; $tree->left = $result->fields['left']; $tree->right = $result->fields['right']; $tree->majority_taxon_tax_id = $result->fields['majority_taxon_tax_id']; $tree->publication = json_decode($result->fields['publication']); $tree_objs[$result->fields['id']] = $tree; // Store path for this tree $paths[$result->fields['id']] = array_reverse(ncbi_ancestors($result->fields['majority_taxon_tax_id'], $tax_id)); $paths[$result->fields['id']][] = $result->fields['majority_taxon_tax_id']; $result->MoveNext(); } // compute layout... // Construct a tree of the majority taxa and use it to arrange trees in "layers" $t = new Tree(); $node_list = array(); // list of nodes in path tree $tree_list = array(); // list of trees assigned to each node $first = true; foreach ($paths as $k => $path) { //print_r($path); $n = count($path); if ($first) {