public static function merge_concepts($args) { $taxon_concept_id_1 = @$args['id1']; $taxon_concept_id_2 = @$args['id2']; $confirmation = @$args['confirmed']; if (!$taxon_concept_id_1 || !is_numeric($taxon_concept_id_1) || !$taxon_concept_id_2 || !is_numeric($taxon_concept_id_2)) { throw new \Exception("supercede_concepts.php [id1] [id2] [confirmed]"); } if ($confirmation == 'confirmed') { \CodeBridge::print_message("Merging TC# {$taxon_concept_id_1} to {$taxon_concept_id_2}"); TaxonConcept::supercede_by_ids($taxon_concept_id_1, $taxon_concept_id_2, true); // Only applicable if run via resque, but safe *enough* otherwise: TaxonConcept::unlock_classifications_by_id($taxon_concept_id_1); TaxonConcept::unlock_classifications_by_id($taxon_concept_id_2); \CodeBridge::print_message("Done. Pages {$taxon_concept_id_1} and {$taxon_concept_id_2} have been merged to: " . min($taxon_concept_id_1, $taxon_concept_id_2)); } else { $descendant_objects = TaxonConcept::count_descendants_objects($taxon_concept_id_1); $descendants = TaxonConcept::count_descendants($taxon_concept_id_1); echo "\n\nTaxonConcept1: " . $taxon_concept_id_1 . "\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; $descendant_objects = TaxonConcept::count_descendants_objects($taxon_concept_id_2); $descendants = TaxonConcept::count_descendants($taxon_concept_id_2); echo "\n\nTaxonConcept2: " . $taxon_concept_id_2 . "\n"; echo "Descendant Objects: {$descendant_objects}\n"; echo "Descendant Concepts: {$descendants}\n"; } }
public static function assign_concepts_across_hierarchies($hierarchy1, $hierarchy2, $confirmed_exclusions = array(), $use_synonyms_for_merging = false) { $mysqli =& $GLOBALS['mysqli_connection']; debug("Assigning concepts from {$hierarchy2->label} ({$hierarchy2->id}) to {$hierarchy1->label} ({$hierarchy1->id})"); // hierarchy is the same and its 'complete' meaning its been curated and // all nodes should be different taxa so there no need to compare it to // itself. Other hierarchies are not 'complete' such as Flickr which can // have several entries for the same taxon if ($hierarchy1->id == $hierarchy2->id && $hierarchy1->complete) { debug("Skipping:: Hierarchies are equivilant and Complete"); return; } // store all changes made this session $superceded = array(); $entries_matched = array(); $concepts_seen = array(); $visible_id = Visibility::visible()->id; $preview_id = Visibility::preview()->id; $solr = new SolrAPI(SOLR_SERVER, 'hierarchy_entry_relationship'); $main_query = "hierarchy_id_1:{$hierarchy1->id} AND (visibility_id_1:{$visible_id} OR visibility_id_1:{$preview_id}) AND hierarchy_id_2:{$hierarchy2->id} AND (visibility_id_2:{$visible_id} OR visibility_id_2:{$preview_id}) AND same_concept:false&sort=relationship asc, visibility_id_1 asc, visibility_id_2 asc, confidence desc, hierarchy_entry_id_1 asc, hierarchy_entry_id_2 asc"; $response = $solr->query($main_query . "&rows=1"); $total_results = $response->numFound; unset($response); debug("querying solr(hierarchy_entry_relationship), got {$total_results} relations.."); $mysqli->begin_transaction(); for ($i = 0; $i < $total_results; $i += self::$solr_iteration_size) { // the global variable which will hold all mathces for this iteration $GLOBALS['hierarchy_entry_matches'] = array(); $this_query = $main_query . "&rows=" . self::$solr_iteration_size . "&start={$i}"; $entries = $solr->get_results($this_query); foreach ($entries as $entry) { if ($entry->relationship == 'syn') { if (!$use_synonyms_for_merging) { continue; } if ($entry->confidence < 0.25) { continue; } } $id1 = $entry->hierarchy_entry_id_1; $visibility_id1 = $entry->visibility_id_1; $tc_id1 = $entry->taxon_concept_id_1; $id2 = $entry->hierarchy_entry_id_2; $visibility_id2 = $entry->visibility_id_2; $tc_id2 = $entry->taxon_concept_id_2; $score = $entry->confidence; // this node in hierarchy 1 has already been matched if ($hierarchy1->complete && isset($entries_matched[$id2])) { continue; } if ($hierarchy2->complete && isset($entries_matched[$id1])) { continue; } $entries_matched[$id1] = 1; $entries_matched[$id2] = 1; // this comparison happens here instead of the query to ensure // the sorting is always the same if this happened in the query // and the entry was related to more than one taxa, and this // function is run more than once then we'll start to get huge // groups of concepts - all transitively related to one another if ($tc_id1 == $tc_id2) { continue; } // get all the recent supercedures withouth looking in the DB while (isset($superceded[$tc_id1])) { $tc_id1 = $superceded[$tc_id1]; } while (isset($superceded[$tc_id2])) { $tc_id2 = $superceded[$tc_id2]; } if ($tc_id1 == $tc_id2) { continue; } $tc_id1 = TaxonConcept::get_superceded_by($tc_id1); $tc_id2 = TaxonConcept::get_superceded_by($tc_id2); if ($tc_id1 == $tc_id2) { continue; } // if even after all recent changes we still have different // concepts, merge them if ($tc_id1 != $tc_id2) { debug("Comparing hierarchy_entry({$id1}) :: hierarchy_entry({$id2})"); // compare visible entries to other published entries if ($hierarchy1->complete && $visibility_id1 == $visible_id && self::concept_published_in_hierarchy($tc_id2, $hierarchy1->id)) { debug("NO: concept 2 published in hierarchy 1"); continue; } if ($hierarchy2->complete && $visibility_id2 == $visible_id && self::concept_published_in_hierarchy($tc_id1, $hierarchy2->id)) { debug("NO: concept 1 published in hierarchy 2"); continue; } // compare preview entries to entries in the latest harvest events if ($hierarchy1->complete && $visibility_id1 == $preview_id && self::concept_preview_in_hierarchy($tc_id2, $hierarchy1->id)) { debug("NO: concept 2 preview in hierarchy 1"); continue; } if ($hierarchy2->complete && $visibility_id2 == $preview_id && self::concept_preview_in_hierarchy($tc_id1, $hierarchy2->id)) { debug("NO: concept 1 preview in hierarchy 2"); continue; } if (self::curators_denied_relationship($id1, $tc_id1, $id2, $tc_id2, $superceded, $confirmed_exclusions)) { debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) has been rejected by a curator"); continue; } if ($hierarchy_id = self::concept_merger_effects_other_hierarchies($tc_id1, $tc_id2)) { debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) is not allowed by a curated hierarchy ({$hierarchy_id})"); continue; } debug("TaxonMatch::({$tc_id1}) = ({$tc_id2})"); debug("TaxonConcept::supercede_by_ids({$tc_id1}, {$tc_id2})"); TaxonConcept::supercede_by_ids($tc_id1, $tc_id2); $superceded[max($tc_id1, $tc_id2)] = min($tc_id1, $tc_id2); static $count = 0; $count++; if ($count % 50 == 0) { $mysqli->commit(); } } } } $mysqli->end_transaction(); }