public static function merge_concepts($args) { $taxon_concept_id_1 = @$args['id1']; $taxon_concept_id_2 = @$args['id2']; $confirmation = @$args['confirmed']; if (!$taxon_concept_id_1 || !is_numeric($taxon_concept_id_1) || !$taxon_concept_id_2 || !is_numeric($taxon_concept_id_2)) { throw new \Exception("supercede_concepts.php [id1] [id2] [confirmed]"); } if ($confirmation == 'confirmed') { \CodeBridge::print_message("Merging TC# {$taxon_concept_id_1} to {$taxon_concept_id_2}"); TaxonConcept::supercede_by_ids($taxon_concept_id_1, $taxon_concept_id_2, true); // Only applicable if run via resque, but safe *enough* otherwise: TaxonConcept::unlock_classifications_by_id($taxon_concept_id_1); TaxonConcept::unlock_classifications_by_id($taxon_concept_id_2); \CodeBridge::print_message("Done. Pages {$taxon_concept_id_1} and {$taxon_concept_id_2} have been merged to: " . min($taxon_concept_id_1, $taxon_concept_id_2)); } else { $descendant_objects = TaxonConcept::count_descendants_objects($taxon_concept_id_1); $descendants = TaxonConcept::count_descendants($taxon_concept_id_1); echo "\n\nTaxonConcept1: " . $taxon_concept_id_1 . "\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; $descendant_objects = TaxonConcept::count_descendants_objects($taxon_concept_id_2); $descendants = TaxonConcept::count_descendants($taxon_concept_id_2); echo "\n\nTaxonConcept2: " . $taxon_concept_id_2 . "\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; } }
public static function split_entry($args) { $hierarchy_entry_id = @$args['hierarchy_entry_id']; $bad_match_hierarchy_entry_id = @$args['bad_match_hierarchy_entry_id']; $confirmation = @$args['confirmed']; if (!$hierarchy_entry_id || !is_numeric($hierarchy_entry_id) || !$bad_match_hierarchy_entry_id || !is_numeric($bad_match_hierarchy_entry_id)) { throw new \Exception("split_entry.php [hierarchy_entry_id] [bad_match_hierarchy_entry_id] [confirmed]"); } \CodeBridge::print_message("Splitting HE# {$hierarchy_entry_id} from {$bad_match_hierarchy_entry_id}"); $he = HierarchyEntry::find($hierarchy_entry_id); $bad_he = HierarchyEntry::find($hierarchy_entry_id); if (!$he->id || !$bad_he->id) { throw new \Exception("Invalid ID"); } if ($he->taxon_concept_id != $bad_he->taxon_concept_id) { throw new \Exception("The bad match ID isn't from the same concept"); } if ($confirmation == 'confirmed') { $user_id = 13; # 13 is Patrick's user ID - TODO - this should be an argument. :| $new_taxon_concept_id = HierarchyEntry::split_from_concept_static($hierarchy_entry_id); $GLOBALS['db_connection']->query("INSERT IGNORE INTO curated_hierarchy_entry_relationships (hierarchy_entry_id_1, hierarchy_entry_id_2, user_id, equivalent)\n VALUES ({$hierarchy_entry_id}, {$bad_match_hierarchy_entry_id}, {$user_id}, 0)"); \CodeBridge::print_message("Done. HE# {$hierarchy_entry_id} was split into a new concept # {$new_taxon_concept_id}"); } else { echo "\n\nRemoving:\n"; print_r($he); echo "Name: " . $he->name->string . "\n\nFrom:\n"; print_r($he->taxon_concept); $descendant_objects = TaxonConcept::count_descendants_objects($he->taxon_concept_id); echo "\n\nDescendant Objects: {$descendant_objects}\n\n"; } }
public static function reindex_concept($args) { $taxon_concept_id = $args['taxon_concept_id']; if (!$taxon_concept_id || !is_numeric($taxon_concept_id)) { throw new \Exception("The TaxonConceptID was missing or was not a number"); return; } Tasks::update_taxon_concept_names(array($taxon_concept_id)); $he = new FlattenHierarchies(); $he->flatten_hierarchies_from_concept_id($taxon_concept_id); TaxonConcept::reindex_descendants_objects($taxon_concept_id); TaxonConcept::reindex_for_search($taxon_concept_id); TaxonConcept::unlock_classifications_by_id($taxon_concept_id); }
public static function update_concept($taxon_concept_id) { if (!$taxon_concept_id || !is_numeric($taxon_concept_id)) { echo "\n\n\t#update_concept([taxon_concept_id])\n\n"; return false; } $taxon_concept = TaxonConcept::find($taxon_concept_id); Tasks::update_taxon_concept_names($taxon_concept_id); $he = new FlattenHierarchies(); $he->flatten_hierarchies_from_concept_id($taxon_concept_id); // make sure hierarchy info is up-to-date TaxonConcept::reindex_descendants_objects($taxon_concept_id); // make sure objects are indexed for display TaxonConcept::reindex_for_search($taxon_concept_id); // make sure objects are indexed for search }
public static function assign_concepts_across_hierarchies($hierarchy1, $hierarchy2, $confirmed_exclusions = array(), $use_synonyms_for_merging = false) { $mysqli =& $GLOBALS['mysqli_connection']; debug("Assigning concepts from {$hierarchy2->label} ({$hierarchy2->id}) to {$hierarchy1->label} ({$hierarchy1->id})"); // hierarchy is the same and its 'complete' meaning its been curated and // all nodes should be different taxa so there no need to compare it to // itself. Other hierarchies are not 'complete' such as Flickr which can // have several entries for the same taxon if ($hierarchy1->id == $hierarchy2->id && $hierarchy1->complete) { debug("Skipping:: Hierarchies are equivilant and Complete"); return; } // store all changes made this session $superceded = array(); $entries_matched = array(); $concepts_seen = array(); $visible_id = Visibility::visible()->id; $preview_id = Visibility::preview()->id; $solr = new SolrAPI(SOLR_SERVER, 'hierarchy_entry_relationship'); $main_query = "hierarchy_id_1:{$hierarchy1->id} AND (visibility_id_1:{$visible_id} OR visibility_id_1:{$preview_id}) AND hierarchy_id_2:{$hierarchy2->id} AND (visibility_id_2:{$visible_id} OR visibility_id_2:{$preview_id}) AND same_concept:false&sort=relationship asc, visibility_id_1 asc, visibility_id_2 asc, confidence desc, hierarchy_entry_id_1 asc, hierarchy_entry_id_2 asc"; $response = $solr->query($main_query . "&rows=1"); $total_results = $response->numFound; unset($response); debug("querying solr(hierarchy_entry_relationship), got {$total_results} relations.."); $mysqli->begin_transaction(); for ($i = 0; $i < $total_results; $i += self::$solr_iteration_size) { // the global variable which will hold all mathces for this iteration $GLOBALS['hierarchy_entry_matches'] = array(); $this_query = $main_query . "&rows=" . self::$solr_iteration_size . "&start={$i}"; $entries = $solr->get_results($this_query); foreach ($entries as $entry) { if ($entry->relationship == 'syn') { if (!$use_synonyms_for_merging) { continue; } if ($entry->confidence < 0.25) { continue; } } $id1 = $entry->hierarchy_entry_id_1; $visibility_id1 = $entry->visibility_id_1; $tc_id1 = $entry->taxon_concept_id_1; $id2 = $entry->hierarchy_entry_id_2; $visibility_id2 = $entry->visibility_id_2; $tc_id2 = $entry->taxon_concept_id_2; $score = $entry->confidence; // this node in hierarchy 1 has already been matched if ($hierarchy1->complete && isset($entries_matched[$id2])) { continue; } if ($hierarchy2->complete && isset($entries_matched[$id1])) { continue; } $entries_matched[$id1] = 1; $entries_matched[$id2] = 1; // this comparison happens here instead of the query to ensure // the sorting is always the same if this happened in the query // and the entry was related to more than one taxa, and this // function is run more than once then we'll start to get huge // groups of concepts - all transitively related to one another if ($tc_id1 == $tc_id2) { continue; } // get all the recent supercedures withouth looking in the DB while (isset($superceded[$tc_id1])) { $tc_id1 = $superceded[$tc_id1]; } while (isset($superceded[$tc_id2])) { $tc_id2 = $superceded[$tc_id2]; } if ($tc_id1 == $tc_id2) { continue; } $tc_id1 = TaxonConcept::get_superceded_by($tc_id1); $tc_id2 = TaxonConcept::get_superceded_by($tc_id2); if ($tc_id1 == $tc_id2) { continue; } // if even after all recent changes we still have different // concepts, merge them if ($tc_id1 != $tc_id2) { debug("Comparing hierarchy_entry({$id1}) :: hierarchy_entry({$id2})"); // compare visible entries to other published entries if ($hierarchy1->complete && $visibility_id1 == $visible_id && self::concept_published_in_hierarchy($tc_id2, $hierarchy1->id)) { debug("NO: concept 2 published in hierarchy 1"); continue; } if ($hierarchy2->complete && $visibility_id2 == $visible_id && self::concept_published_in_hierarchy($tc_id1, $hierarchy2->id)) { debug("NO: concept 1 published in hierarchy 2"); continue; } // compare preview entries to entries in the latest harvest events if ($hierarchy1->complete && $visibility_id1 == $preview_id && self::concept_preview_in_hierarchy($tc_id2, $hierarchy1->id)) { debug("NO: concept 2 preview in hierarchy 1"); continue; } if ($hierarchy2->complete && $visibility_id2 == $preview_id && self::concept_preview_in_hierarchy($tc_id1, $hierarchy2->id)) { debug("NO: concept 1 preview in hierarchy 2"); continue; } if (self::curators_denied_relationship($id1, $tc_id1, $id2, $tc_id2, $superceded, $confirmed_exclusions)) { debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) has been rejected by a curator"); continue; } if ($hierarchy_id = self::concept_merger_effects_other_hierarchies($tc_id1, $tc_id2)) { debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) is not allowed by a curated hierarchy ({$hierarchy_id})"); continue; } debug("TaxonMatch::({$tc_id1}) = ({$tc_id2})"); debug("TaxonConcept::supercede_by_ids({$tc_id1}, {$tc_id2})"); TaxonConcept::supercede_by_ids($tc_id1, $tc_id2); $superceded[max($tc_id1, $tc_id2)] = min($tc_id1, $tc_id2); static $count = 0; $count++; if ($count % 50 == 0) { $mysqli->commit(); } } } } $mysqli->end_transaction(); }
private function get_stats_for_family($taxon_concept_id) { $query = "SELECT COUNT(DISTINCT he_children.taxon_concept_id) as count\n FROM hierarchy_entries he\n JOIN hierarchy_entries_flattened hef on (he.id=hef.ancestor_id)\n JOIN hierarchy_entries he_children on (hef.hierarchy_entry_id=he_children.id)\n JOIN taxon_concepts tc on (he_children.taxon_concept_id=tc.id)\n JOIN hierarchies h on (he_children.hierarchy_id=h.id)\n LEFT JOIN taxon_concept_metrics tcm ON (he_children.taxon_concept_id=tcm.taxon_concept_id)\n WHERE he.taxon_concept_id={$taxon_concept_id}\n AND he.published=1\n AND tc.published=1\n AND tc.supercedure_id=0\n AND he.visibility_id=" . Visibility::visible()->id . "\n AND he_children.rank_id IN (" . Rank::find_by_translated('label', 'sp.')->id . ", " . Rank::find_by_translated('label', 'species')->id . ")\n AND tcm.richness_score >= .4"; $count_of_rich_species = $this->mysqli->select_value($query); $media_counts = TaxonConcept::media_counts($taxon_concept_id); $all_media_count = @$media_counts['image'] + @$media_counts['video'] + @$media_counts['sound']; return array('NumberRichSpeciesPagesInEOL' => $count_of_rich_species, 'NumberImagesInEOL' => @$media_counts['image'], 'NumberArticlesInEOL' => @$media_counts['text'], 'NumberMediaInEOL' => $all_media_count, 'RichPageOnEOL' => $count_of_rich_species >= 0 ? 'http://eol.org/schema/terms/yes' : 'http://eol.org/schema/terms/no'); }
public static function move_entry($args) { $taxon_concept_id_from = @$args['taxon_concept_id_from']; $hierarchy_entry_id = @$args['hierarchy_entry_id']; $taxon_concept_id_to = @$args['taxon_concept_id_to']; $bad_match_hierarchy_entry_id = @$args['bad_match_hierarchy_entry_id']; $confirmation = @$args['confirmed']; if (!$taxon_concept_id_from || !is_numeric($taxon_concept_id_from) || !$hierarchy_entry_id || !is_numeric($hierarchy_entry_id) || !$taxon_concept_id_to || !is_numeric($taxon_concept_id_to) || !$bad_match_hierarchy_entry_id || !is_numeric($bad_match_hierarchy_entry_id)) { throw new \Exception("split_concept.php [taxon_concept_id_from] [hierarchy_entry_id] [taxon_concept_id_to] [bad_match_hierarchy_entry_id] [confirmed] [reindex?]"); } \CodeBridge::print_message("Moving HE# {$hierarchy_entry_id} from TC# {$taxon_concept_id_from} to TC# " . "{$taxon_concept_id_to} avoiding HE# {$bad_match_hierarchy_entry_id}"); $tc_from = TaxonConcept::find($taxon_concept_id_from); $tc_to = TaxonConcept::find($taxon_concept_id_to); $he = HierarchyEntry::find($hierarchy_entry_id); $bad_he = HierarchyEntry::find($bad_match_hierarchy_entry_id); if (!$he->id || !$tc_from->id || !$tc_to->id || !$bad_he->id) { throw new \Exception("Invalid ID"); } if ($he->taxon_concept_id != $tc_from->id) { throw new \Exception("This entry is not in the source concept"); } if ($he->taxon_concept_id != $bad_he->taxon_concept_id) { throw new \Exception("The bad match ID isn't from the same concept"); } if ($confirmation == 'confirmed' || $confirmation == 'force') { if ($confirmation == 'force') { $force_move_if_disallowed = true; } else { $force_move_if_disallowed = false; } $user_id = 13; # 13 is Patrick's user ID // TODO Need to look through all the HEs in the TC we're moving *to* and cycle through them to make sure none of // them are blocking the move: foreach ($tc_to->hierarchy_entries as $tc_he) { $GLOBALS['db_connection']->query("DELETE FROM curated_hierarchy_entry_relationships\n WHERE hierarchy_entry_id_1={$hierarchy_entry_id} AND hierarchy_entry_id_2=" . $tc_he->id . " AND equivalent=0"); } $moved = HierarchyEntry::move_to_concept_static($hierarchy_entry_id, $taxon_concept_id_to, $force_move_if_disallowed, true); if (!$moved) { \CodeBridge::print_message("NOT ALLOWED: throwing exception"); throw new \Exception("This move is not allowed; it would affect other hierarchies"); } $GLOBALS['db_connection']->query("INSERT IGNORE INTO curated_hierarchy_entry_relationships VALUES ({$hierarchy_entry_id}, {$bad_match_hierarchy_entry_id}, {$user_id}, 0)"); \CodeBridge::print_message("Done. Moved {$hierarchy_entry_id} to {$taxon_concept_id_to}"); } else { echo "\n\nRemoving:\n"; print_r($he); echo "Name: " . $he->name->string . "\n\nFrom:\n"; print_r($tc_from); echo "To:\n"; print_r($tc_to); $descendant_objects = TaxonConcept::count_descendants_objects($tc_from->id); $descendants = TaxonConcept::count_descendants($tc_from->id); echo "\n\nTaxonConcept1: {$tc_from->id}\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; $descendant_objects = TaxonConcept::count_descendants_objects($tc_to->id); $descendants = TaxonConcept::count_descendants($tc_to->id); echo "\n\nTaxonConcept1: {$tc_to->id}\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; echo "\n\nDon't forget to solr_update_concept.php\n\n"; } }