Пример #1
0
 public static function assign_concepts_across_hierarchies($hierarchy1, $hierarchy2, $confirmed_exclusions = array(), $use_synonyms_for_merging = false)
 {
     $mysqli =& $GLOBALS['mysqli_connection'];
     debug("Assigning concepts from {$hierarchy2->label} ({$hierarchy2->id}) to {$hierarchy1->label} ({$hierarchy1->id})");
     // hierarchy is the same and its 'complete' meaning its been curated and
     // all nodes should be different taxa so there no need to compare it to
     // itself. Other hierarchies are not 'complete' such as Flickr which can
     // have several entries for the same taxon
     if ($hierarchy1->id == $hierarchy2->id && $hierarchy1->complete) {
         debug("Skipping:: Hierarchies are equivilant and Complete");
         return;
     }
     // store all changes made this session
     $superceded = array();
     $entries_matched = array();
     $concepts_seen = array();
     $visible_id = Visibility::visible()->id;
     $preview_id = Visibility::preview()->id;
     $solr = new SolrAPI(SOLR_SERVER, 'hierarchy_entry_relationship');
     $main_query = "hierarchy_id_1:{$hierarchy1->id} AND (visibility_id_1:{$visible_id} OR visibility_id_1:{$preview_id}) AND hierarchy_id_2:{$hierarchy2->id} AND (visibility_id_2:{$visible_id} OR visibility_id_2:{$preview_id}) AND same_concept:false&sort=relationship asc, visibility_id_1 asc, visibility_id_2 asc, confidence desc, hierarchy_entry_id_1 asc, hierarchy_entry_id_2 asc";
     $response = $solr->query($main_query . "&rows=1");
     $total_results = $response->numFound;
     unset($response);
     debug("querying solr(hierarchy_entry_relationship), got {$total_results} relations..");
     $mysqli->begin_transaction();
     for ($i = 0; $i < $total_results; $i += self::$solr_iteration_size) {
         // the global variable which will hold all mathces for this iteration
         $GLOBALS['hierarchy_entry_matches'] = array();
         $this_query = $main_query . "&rows=" . self::$solr_iteration_size . "&start={$i}";
         $entries = $solr->get_results($this_query);
         foreach ($entries as $entry) {
             if ($entry->relationship == 'syn') {
                 if (!$use_synonyms_for_merging) {
                     continue;
                 }
                 if ($entry->confidence < 0.25) {
                     continue;
                 }
             }
             $id1 = $entry->hierarchy_entry_id_1;
             $visibility_id1 = $entry->visibility_id_1;
             $tc_id1 = $entry->taxon_concept_id_1;
             $id2 = $entry->hierarchy_entry_id_2;
             $visibility_id2 = $entry->visibility_id_2;
             $tc_id2 = $entry->taxon_concept_id_2;
             $score = $entry->confidence;
             // this node in hierarchy 1 has already been matched
             if ($hierarchy1->complete && isset($entries_matched[$id2])) {
                 continue;
             }
             if ($hierarchy2->complete && isset($entries_matched[$id1])) {
                 continue;
             }
             $entries_matched[$id1] = 1;
             $entries_matched[$id2] = 1;
             // this comparison happens here instead of the query to ensure
             // the sorting is always the same if this happened in the query
             // and the entry was related to more than one taxa, and this
             // function is run more than once then we'll start to get huge
             // groups of concepts - all transitively related to one another
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             // get all the recent supercedures withouth looking in the DB
             while (isset($superceded[$tc_id1])) {
                 $tc_id1 = $superceded[$tc_id1];
             }
             while (isset($superceded[$tc_id2])) {
                 $tc_id2 = $superceded[$tc_id2];
             }
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             $tc_id1 = TaxonConcept::get_superceded_by($tc_id1);
             $tc_id2 = TaxonConcept::get_superceded_by($tc_id2);
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             // if even after all recent changes we still have different
             // concepts, merge them
             if ($tc_id1 != $tc_id2) {
                 debug("Comparing hierarchy_entry({$id1}) :: hierarchy_entry({$id2})");
                 // compare visible entries to other published entries
                 if ($hierarchy1->complete && $visibility_id1 == $visible_id && self::concept_published_in_hierarchy($tc_id2, $hierarchy1->id)) {
                     debug("NO: concept 2 published in hierarchy 1");
                     continue;
                 }
                 if ($hierarchy2->complete && $visibility_id2 == $visible_id && self::concept_published_in_hierarchy($tc_id1, $hierarchy2->id)) {
                     debug("NO: concept 1 published in hierarchy 2");
                     continue;
                 }
                 // compare preview entries to entries in the latest harvest events
                 if ($hierarchy1->complete && $visibility_id1 == $preview_id && self::concept_preview_in_hierarchy($tc_id2, $hierarchy1->id)) {
                     debug("NO: concept 2 preview in hierarchy 1");
                     continue;
                 }
                 if ($hierarchy2->complete && $visibility_id2 == $preview_id && self::concept_preview_in_hierarchy($tc_id1, $hierarchy2->id)) {
                     debug("NO: concept 1 preview in hierarchy 2");
                     continue;
                 }
                 if (self::curators_denied_relationship($id1, $tc_id1, $id2, $tc_id2, $superceded, $confirmed_exclusions)) {
                     debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) has been rejected by a curator");
                     continue;
                 }
                 if ($hierarchy_id = self::concept_merger_effects_other_hierarchies($tc_id1, $tc_id2)) {
                     debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) is not allowed by a curated hierarchy ({$hierarchy_id})");
                     continue;
                 }
                 debug("TaxonMatch::({$tc_id1}) = ({$tc_id2})");
                 debug("TaxonConcept::supercede_by_ids({$tc_id1}, {$tc_id2})");
                 TaxonConcept::supercede_by_ids($tc_id1, $tc_id2);
                 $superceded[max($tc_id1, $tc_id2)] = min($tc_id1, $tc_id2);
                 static $count = 0;
                 $count++;
                 if ($count % 50 == 0) {
                     $mysqli->commit();
                 }
             }
         }
     }
     $mysqli->end_transaction();
 }