public static function assign_concepts_across_hierarchies($hierarchy1, $hierarchy2, $confirmed_exclusions = array(), $use_synonyms_for_merging = false)
 {
     $mysqli =& $GLOBALS['mysqli_connection'];
     debug("Assigning concepts from {$hierarchy2->label} ({$hierarchy2->id}) to {$hierarchy1->label} ({$hierarchy1->id})");
     // hierarchy is the same and its 'complete' meaning its been curated and
     // all nodes should be different taxa so there no need to compare it to
     // itself. Other hierarchies are not 'complete' such as Flickr which can
     // have several entries for the same taxon
     if ($hierarchy1->id == $hierarchy2->id && $hierarchy1->complete) {
         debug("Skipping:: Hierarchies are equivilant and Complete");
         return;
     }
     // store all changes made this session
     $superceded = array();
     $entries_matched = array();
     $concepts_seen = array();
     $visible_id = Visibility::visible()->id;
     $preview_id = Visibility::preview()->id;
     $solr = new SolrAPI(SOLR_SERVER, 'hierarchy_entry_relationship');
     $main_query = "hierarchy_id_1:{$hierarchy1->id} AND (visibility_id_1:{$visible_id} OR visibility_id_1:{$preview_id}) AND hierarchy_id_2:{$hierarchy2->id} AND (visibility_id_2:{$visible_id} OR visibility_id_2:{$preview_id}) AND same_concept:false&sort=relationship asc, visibility_id_1 asc, visibility_id_2 asc, confidence desc, hierarchy_entry_id_1 asc, hierarchy_entry_id_2 asc";
     $response = $solr->query($main_query . "&rows=1");
     $total_results = $response->numFound;
     unset($response);
     debug("querying solr(hierarchy_entry_relationship), got {$total_results} relations..");
     $mysqli->begin_transaction();
     for ($i = 0; $i < $total_results; $i += self::$solr_iteration_size) {
         // the global variable which will hold all mathces for this iteration
         $GLOBALS['hierarchy_entry_matches'] = array();
         $this_query = $main_query . "&rows=" . self::$solr_iteration_size . "&start={$i}";
         $entries = $solr->get_results($this_query);
         foreach ($entries as $entry) {
             if ($entry->relationship == 'syn') {
                 if (!$use_synonyms_for_merging) {
                     continue;
                 }
                 if ($entry->confidence < 0.25) {
                     continue;
                 }
             }
             $id1 = $entry->hierarchy_entry_id_1;
             $visibility_id1 = $entry->visibility_id_1;
             $tc_id1 = $entry->taxon_concept_id_1;
             $id2 = $entry->hierarchy_entry_id_2;
             $visibility_id2 = $entry->visibility_id_2;
             $tc_id2 = $entry->taxon_concept_id_2;
             $score = $entry->confidence;
             // this node in hierarchy 1 has already been matched
             if ($hierarchy1->complete && isset($entries_matched[$id2])) {
                 continue;
             }
             if ($hierarchy2->complete && isset($entries_matched[$id1])) {
                 continue;
             }
             $entries_matched[$id1] = 1;
             $entries_matched[$id2] = 1;
             // this comparison happens here instead of the query to ensure
             // the sorting is always the same if this happened in the query
             // and the entry was related to more than one taxa, and this
             // function is run more than once then we'll start to get huge
             // groups of concepts - all transitively related to one another
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             // get all the recent supercedures withouth looking in the DB
             while (isset($superceded[$tc_id1])) {
                 $tc_id1 = $superceded[$tc_id1];
             }
             while (isset($superceded[$tc_id2])) {
                 $tc_id2 = $superceded[$tc_id2];
             }
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             $tc_id1 = TaxonConcept::get_superceded_by($tc_id1);
             $tc_id2 = TaxonConcept::get_superceded_by($tc_id2);
             if ($tc_id1 == $tc_id2) {
                 continue;
             }
             // if even after all recent changes we still have different
             // concepts, merge them
             if ($tc_id1 != $tc_id2) {
                 debug("Comparing hierarchy_entry({$id1}) :: hierarchy_entry({$id2})");
                 // compare visible entries to other published entries
                 if ($hierarchy1->complete && $visibility_id1 == $visible_id && self::concept_published_in_hierarchy($tc_id2, $hierarchy1->id)) {
                     debug("NO: concept 2 published in hierarchy 1");
                     continue;
                 }
                 if ($hierarchy2->complete && $visibility_id2 == $visible_id && self::concept_published_in_hierarchy($tc_id1, $hierarchy2->id)) {
                     debug("NO: concept 1 published in hierarchy 2");
                     continue;
                 }
                 // compare preview entries to entries in the latest harvest events
                 if ($hierarchy1->complete && $visibility_id1 == $preview_id && self::concept_preview_in_hierarchy($tc_id2, $hierarchy1->id)) {
                     debug("NO: concept 2 preview in hierarchy 1");
                     continue;
                 }
                 if ($hierarchy2->complete && $visibility_id2 == $preview_id && self::concept_preview_in_hierarchy($tc_id1, $hierarchy2->id)) {
                     debug("NO: concept 1 preview in hierarchy 2");
                     continue;
                 }
                 if (self::curators_denied_relationship($id1, $tc_id1, $id2, $tc_id2, $superceded, $confirmed_exclusions)) {
                     debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) has been rejected by a curator");
                     continue;
                 }
                 if ($hierarchy_id = self::concept_merger_effects_other_hierarchies($tc_id1, $tc_id2)) {
                     debug("The merger of {$id1} and {$id2} (concepts {$tc_id1} and {$tc_id2}) is not allowed by a curated hierarchy ({$hierarchy_id})");
                     continue;
                 }
                 debug("TaxonMatch::({$tc_id1}) = ({$tc_id2})");
                 debug("TaxonConcept::supercede_by_ids({$tc_id1}, {$tc_id2})");
                 TaxonConcept::supercede_by_ids($tc_id1, $tc_id2);
                 $superceded[max($tc_id1, $tc_id2)] = min($tc_id1, $tc_id2);
                 static $count = 0;
                 $count++;
                 if ($count % 50 == 0) {
                     $mysqli->commit();
                 }
             }
         }
     }
     $mysqli->end_transaction();
 }
 public function insert_data_object($row, $parameters)
 {
     self::debug_iterations("Inserting DataObject");
     $this->commit_iterations("DataObject", 20);
     if ($this->archive_validator->has_error_by_line('http://eol.org/schema/media/document', $parameters['archive_table_definition']->location, $parameters['archive_line_number'])) {
         write_to_resource_harvesting_log("ERROR: insert_data_object: has_error_by_line" . ",file_location:" . $parameters['archive_table_definition']->location . ",line_number:" . $parameters['archive_line_number']);
         return false;
     }
     $object_taxon_ids = self::get_foreign_keys_from_row($row, 'http://rs.tdwg.org/dwc/terms/taxonID');
     $object_taxon_info = array();
     if ($object_taxon_ids) {
         foreach ($object_taxon_ids as $taxon_id) {
             if ($taxon_info = @$this->taxon_ids_inserted[$taxon_id]) {
                 self::uncompress_array($taxon_info);
                 $object_taxon_info[] = $taxon_info;
             }
         }
     }
     if (!$object_taxon_info) {
         return false;
     }
     if ($this->harvest_event->resource->is_eol_flickr_group() && self::is_this_flickr_image_in_inaturalist($row)) {
         return false;
     }
     $data_object = new DataObject();
     $data_object->identifier = @self::field_decode($row['http://purl.org/dc/terms/identifier']);
     if (isset($this->media_ids_inserted[$data_object->identifier])) {
         return false;
     }
     $data_object->data_type = DataType::find_or_create_by_schema_value(@self::field_decode($row['http://purl.org/dc/terms/type']));
     if ($dt = DataType::find_or_create_by_schema_value(@self::field_decode($row['http://rs.tdwg.org/audubon_core/subtype']))) {
         $data_object->data_subtype_id = $dt->id;
     }
     $data_object->mime_type = MimeType::find_or_create_by_translated_label(@self::field_decode($row['http://purl.org/dc/terms/format']));
     $data_object->object_created_at = @self::field_decode($row['http://ns.adobe.com/xap/1.0/CreateDate']);
     $data_object->object_modified_at = @self::field_decode($row['http://purl.org/dc/terms/modified']);
     $data_object->available_at = @self::field_decode($row['http://purl.org/dc/terms/available']);
     $data_object->object_title = @self::field_decode($row['http://purl.org/dc/terms/title']);
     $data_object->language = Language::find_or_create_for_parser(@self::field_decode($row['http://purl.org/dc/terms/language']));
     // check multiple fields for a value of license
     if (isset($row['http://purl.org/dc/terms/license'])) {
         $license_string = @self::field_decode($row['http://purl.org/dc/terms/license']);
     } else {
         $license_string = @self::field_decode($row['http://ns.adobe.com/xap/1.0/rights/UsageTerms']);
     }
     // convert British licences to American licenses
     $license_string = str_replace("creativecommons.org/licences/", "creativecommons.org/licenses/", $license_string);
     if (!$license_string && $this->harvest_event->resource->license && $this->harvest_event->resource->license->source_url) {
         $license_string = $this->harvest_event->resource->license->source_url;
     }
     if (!$license_string || !\eol_schema\MediaResource::valid_license($license_string)) {
         return false;
     }
     $data_object->license = License::find_or_create_for_parser($license_string);
     $data_object->rights_statement = @self::field_decode($row['http://purl.org/dc/terms/rights']);
     $data_object->rights_holder = @self::field_decode($row['http://ns.adobe.com/xap/1.0/rights/Owner']);
     $data_object->bibliographic_citation = @self::field_decode($row['http://purl.org/dc/terms/bibliographicCitation']);
     $data_object->source_url = @self::field_decode($row['http://rs.tdwg.org/ac/terms/furtherInformationURL']);
     $data_object->derived_from = @self::field_decode($row['http://rs.tdwg.org/ac/terms/derivedFrom']);
     $data_object->description = @self::field_decode($row['http://purl.org/dc/terms/description']);
     // Turn newlines into paragraphs
     $data_object->description = str_replace("\n", "</p><p>", $data_object->description);
     $data_object->object_url = @self::field_decode($row['http://rs.tdwg.org/ac/terms/accessURI']);
     $data_object->thumbnail_url = @self::field_decode($row['http://eol.org/schema/media/thumbnailURL']);
     $data_object->location = @self::field_decode($row['http://iptc.org/std/Iptc4xmpExt/1.0/xmlns/LocationCreated']);
     $data_object->spatial_location = @self::field_decode($row['http://purl.org/dc/terms/spatial']);
     $data_object->latitude = @self::field_decode($row['http://www.w3.org/2003/01/geo/wgs84_pos#lat']);
     $data_object->longitude = @self::field_decode($row['http://www.w3.org/2003/01/geo/wgs84_pos#long']);
     $data_object->altitude = @self::field_decode($row['http://www.w3.org/2003/01/geo/wgs84_pos#alt']);
     $rating = @self::field_decode($row['http://ns.adobe.com/xap/1.0/Rating']);
     // ratings may be 0 to 5
     // TODO: technically 0 means untrusted, and then anywhere from 1-5 is OK.
     // 0.5 for example isn't really valid acording to the schema
     if (is_numeric($rating) && $rating > 0 && $rating <= 5) {
         $data_object->data_rating = $rating;
     }
     //TODO - update this
     if ($data_object->mime_type && $data_object->mime_type->equals(MimeType::flash()) && $data_object->is_video()) {
         $data_object->data_type = DataType::youtube();
         $data_object->data_type_id = DataType::youtube()->id;
     }
     // //take the first available source_url of one of this object's taxa
     if (!@$data_object->source_url && @$taxon_parameters["source_url"]) {
         foreach ($object_taxon_info as $taxon_info) {
             if ($source_url = $taxon_info['source_url']) {
                 $data_object->source_url = $source_url;
                 break;
             }
         }
     }
     /* Checking requirements */
     // if text: must have description
     if ($data_object->data_type->equals(DataType::text()) && !$data_object->description) {
         return false;
     }
     // if image, movie or sound: must have object_url
     if (($data_object->data_type->equals(DataType::video()) || $data_object->data_type->equals(DataType::sound()) || $data_object->data_type->equals(DataType::image())) && !$data_object->object_url) {
         return false;
     }
     /* ADDING THE DATA OBJECT */
     list($data_object, $status) = DataObject::find_and_compare($this->harvest_event->resource, $data_object, $this->content_manager);
     if (@(!$data_object->id)) {
         return false;
     }
     $this->media_ids_inserted[$data_object->identifier] = $data_object->id;
     $this->harvest_event->add_data_object($data_object, $status);
     $data_object->delete_hierarchy_entries();
     $vetted_id = Vetted::unknown()->id;
     $visibility_id = Visibility::preview()->id;
     foreach ($object_taxon_info as $taxon_info) {
         $he_id = $taxon_info['hierarchy_entry_id'];
         $tc_id = $taxon_info['taxon_concept_id'];
         $this->mysqli->insert("INSERT IGNORE INTO data_objects_hierarchy_entries (hierarchy_entry_id, data_object_id, vetted_id, visibility_id) VALUES ({$he_id}, {$data_object->id}, {$vetted_id}, {$visibility_id})");
         $this->mysqli->insert("INSERT IGNORE INTO data_objects_taxon_concepts (taxon_concept_id, data_object_id) VALUES ({$tc_id}, {$data_object->id})");
     }
     // a few things to add after the DataObject is inserted
     // keep track of reference foreign keys
     self::append_foreign_keys_from_row($row, 'http://eol.org/schema/reference/referenceID', $this->media_reference_ids, $data_object->id, $data_object->guid);
     // keep track of agent foreign keys
     self::append_foreign_keys_from_row($row, 'http://eol.org/schema/agent/agentID', $this->media_agent_ids, $data_object->id);
     $data_object->delete_info_items();
     $data_object->delete_table_of_contents();
     if ($s = @self::field_decode($row['http://iptc.org/std/Iptc4xmpExt/1.0/xmlns/CVterm'])) {
         $ii = InfoItem::find_or_create_by_schema_value($s);
         $data_object->add_info_item($ii->id);
         unset($ii);
     }
     if ($a = @self::field_decode($row['http://purl.org/dc/terms/audience'])) {
         $a = Audience::find_or_create_by_translated_label(trim((string) $a));
         $data_object->add_audience($a->id);
         unset($a);
     }
     $data_object_parameters["agents"] = array();
     self::append_agents($row, $data_object_parameters, 'http://purl.org/dc/terms/creator', 'Creator');
     self::append_agents($row, $data_object_parameters, 'http://purl.org/dc/terms/publisher', 'Publisher');
     self::append_agents($row, $data_object_parameters, 'http://purl.org/dc/terms/contributor', 'Contributor');
     $data_object->delete_agents();
     $i = 0;
     foreach ($data_object_parameters['agents'] as &$a) {
         $agent = Agent::find_or_create($a);
         if ($agent->logo_url && !$agent->logo_cache_url) {
             if ($logo_cache_url = $this->content_manager->grab_file($agent->logo_url, "partner")) {
                 $agent->logo_cache_url = $logo_cache_url;
                 $agent->save();
             }
         }
         $data_object->add_agent($agent->id, @$a['agent_role']->id ?: 0, $i);
         unset($a);
         $i++;
     }
     if (!isset($this->object_references_deleted[$data_object->id])) {
         $data_object->delete_refs();
         $this->object_references_deleted[$data_object->id] = true;
     }
     // add data object info to resource contribution
     if ($status != "Unchanged") {
         $result = $this->mysqli->query("SELECT id, source_url, taxon_concept_id, hierarchy_id, identifier FROM hierarchy_entries inner join  data_objects_hierarchy_entries on hierarchy_entries.id = data_objects_hierarchy_entries.hierarchy_entry_id where data_object_id =" . $data_object->id);
         if ($result && ($row = $result->fetch_assoc())) {
             $hierarchy_entry_id = $row["id"];
             $source = "'" . $this->get_hierarchy_entry_outlink($row["hierarchy_id"], $row["identifier"], preg_replace('/\'/', "\\'", $row["source_url"])) . "'";
             $identifier = "'" . $row["identifier"] . "'";
             $taxon_concept_id = $row["taxon_concept_id"];
         }
         $resource_id = $this->harvest_event->resource_id;
         $this->mysqli->insert("INSERT IGNORE INTO resource_contributions (resource_id, data_object_id, data_point_uri_id, hierarchy_entry_id, taxon_concept_id, source, object_type, identifier, data_object_type) VALUES ({$resource_id}, {$data_object->id}, NULL, {$hierarchy_entry_id}, {$taxon_concept_id}, {$source}, 'data_object', {$identifier}, {$data_object->data_type_id})");
     }
 }
 static function lookup_existing_entry_and_ancestors($hierarchy_entry, $hierarchy_id)
 {
     $params = array();
     $params["name_id"] = $hierarchy_entry->name_id;
     $params["guid"] = $hierarchy_entry->guid;
     $params["hierarchy_id"] = $hierarchy_id;
     $params["rank_id"] = $hierarchy_entry->rank_id;
     $params["ancestry"] = $hierarchy_entry->ancestry;
     $params["taxon_concept_id"] = $hierarchy_entry->taxon_concept_id;
     $params["parent_id"] = 0;
     // $params["identifier"] = $taxon['identifier'];
     // $params["source_url"] = $taxon['source_url'];
     $params["visibility_id"] = Visibility::preview()->id;
     if ($parent = $hierarchy_entry->parent()) {
         if ($parent_entry = self::lookup_existing_entry_and_ancestors($parent, $hierarchy_id)) {
             $params["parent_id"] = $parent_entry->id;
         } else {
             return false;
         }
     }
     return HierarchyEntry::find_or_create_by_array($params);
 }
Exemple #4
0
 public static function update_taxon_concept_names($taxon_concept_ids)
 {
     if (!$taxon_concept_ids) {
         return false;
     }
     if (is_numeric($taxon_concept_ids)) {
         $taxon_concept_ids = array($taxon_concept_ids);
     }
     $mysqli =& $GLOBALS['db_connection'];
     $started_new_transaction = false;
     if (!$mysqli->in_transaction()) {
         $mysqli->begin_transaction();
         $started_new_transaction = true;
     }
     $batches = array_chunk($taxon_concept_ids, 500);
     foreach ($batches as $batch_ids) {
         usleep(500000);
         $name_ids = array();
         $matching_ids = array();
         $query = "\n            (SELECT he.taxon_concept_id, he.id, he.name_id, 'preferred' as type FROM hierarchy_entries he WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))\n            UNION\n            (SELECT he.taxon_concept_id, s.hierarchy_entry_id, s.name_id, 'synonym' as type\n            FROM hierarchy_entries he\n            JOIN synonyms s ON (he.id=s.hierarchy_entry_id)\n            WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ")\n            AND s.language_id=0\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('blast name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank acronym')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('acronym')->id . "\n            AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $hierarchy_entry_id = $row[1];
             $name_id = $row[2];
             $name_type = $row[3];
             $name_ids[$name_id][$taxon_concept_id] = 1;
             $matching_ids[$taxon_concept_id][$name_id][$hierarchy_entry_id] = $name_type;
         }
         if ($name_ids) {
             //This makes sure we have a scientific name, gets the canonicalFormID
             $query = "SELECT n.id, n_match.id FROM names n JOIN canonical_forms cf ON (n.canonical_form_id=cf.id) JOIN names n_match ON (cf.id=n_match.canonical_form_id) WHERE n.id IN (" . implode(",", array_keys($name_ids)) . ") AND n_match.string=cf.string";
             foreach ($mysqli->iterate_file($query) as $row_num => $row) {
                 $original_name_id = $row[0];
                 $canonical_name_id = $row[1];
                 if ($original_name_id != $canonical_name_id) {
                     foreach ($name_ids[$original_name_id] as $taxon_concept_id => $junk) {
                         $matching_ids[$taxon_concept_id][$canonical_name_id][0] = 1;
                     }
                 }
             }
         }
         $common_names = array();
         $preferred_in_language = array();
         $query = "SELECT he.taxon_concept_id, he.published, he.visibility_id, s.id, s.hierarchy_id, s.hierarchy_entry_id, s.name_id, s.language_id, s.preferred, s.vetted_id FROM hierarchy_entries he JOIN synonyms s ON (he.id=s.hierarchy_entry_id) JOIN vetted v ON (s.vetted_id=v.id) WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND s.language_id!=0 AND (s.synonym_relation_id=" . SynonymRelation::genbank_common_name()->id . " OR s.synonym_relation_id=" . SynonymRelation::common_name()->id . ") ORDER BY s.language_id, (s.hierarchy_id=" . Hierarchy::contributors()->id . ") DESC, v.view_order ASC, s.preferred DESC, s.id DESC";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $published = $row[1];
             $visibility_id = $row[2];
             $synonym_id = $row[3];
             $hierarchy_id = $row[4];
             $hierarchy_entry_id = $row[5];
             $name_id = $row[6];
             $language_id = $row[7];
             $preferred = $row[8];
             $vetted_id = $row[9];
             // skipping Wikipedia common names entirely
             if ($hierarchy_id == @Hierarchy::wikipedia()->id) {
                 continue;
             }
             $curator_name = $hierarchy_id == @Hierarchy::contributors()->id;
             $ubio_name = $hierarchy_id == @Hierarchy::ubio()->id;
             if ($curator_name || $ubio_name || $curator_name || $published == 1 && $visibility_id == Visibility::visible()->id) {
                 if (isset($preferred_in_language[$taxon_concept_id][$language_id])) {
                     $preferred = 0;
                 }
                 if ($preferred && $curator_name && ($vetted_id == Vetted::trusted()->id || $vetted_id == Vetted::unknown()->id)) {
                     $preferred_in_language[$taxon_concept_id][$language_id] = 1;
                 } else {
                     $preferred = 0;
                 }
                 if (!isset($common_names[$taxon_concept_id])) {
                     $common_names[$taxon_concept_id] = array();
                 }
                 $common_names[$taxon_concept_id][] = array('synonym_id' => $synonym_id, 'language_id' => $language_id, 'name_id' => $name_id, 'hierarchy_entry_id' => $hierarchy_entry_id, 'preferred' => $preferred, 'vetted_id' => $vetted_id, 'is_curator_name' => $curator_name);
             }
         }
         // if there was no preferred name
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 if (@(!$preferred_in_language[$taxon_concept_id][$arr2['language_id']]) && ($arr2['vetted_id'] == Vetted::trusted()->id || $arr2['vetted_id'] == Vetted::unknown()->id)) {
                     $common_names[$taxon_concept_id][$key]['preferred'] = 1;
                     $preferred_in_language[$taxon_concept_id][$arr2['language_id']] = 1;
                 }
             }
         }
         $mysqli->delete("DELETE FROM taxon_concept_names WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ")");
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the scientific names */
         foreach ($matching_ids as $taxon_concept_id => $arr) {
             foreach ($arr as $name_id => $arr2) {
                 foreach ($arr2 as $hierarchy_entry_id => $type) {
                     $preferred = 0;
                     if ($hierarchy_entry_id && $type == "preferred") {
                         $preferred = 1;
                     }
                     fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t0\t0\t{$preferred}\n");
                 }
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the common names */
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 $synonym_id = $arr2['synonym_id'];
                 $language_id = $arr2['language_id'];
                 $name_id = $arr2['name_id'];
                 $hierarchy_entry_id = $arr2['hierarchy_entry_id'];
                 $preferred = $arr2['preferred'];
                 $vetted_id = $arr2['vetted_id'];
                 fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t{$language_id}\t1\t{$preferred}\t{$synonym_id}\t{$vetted_id}\n");
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         unset($matching_ids);
         unset($common_names);
         unset($name_ids);
         unset($preferred_in_language);
         $mysqli->commit();
     }
     if ($started_new_transaction) {
         $mysqli->end_transaction();
     }
 }
Exemple #5
0
 private function lookup_hierarchy_entry_ids()
 {
     echo "lookup_hierarchy_entry_ids " . memory_get_usage() . " " . time_elapsed() . "\n";
     $taxon_concept_ids = array_keys($this->baseline_concept_images);
     sort($taxon_concept_ids);
     $this->baseline_hierarchy_entry_ids = array();
     $chunks = array_chunk($taxon_concept_ids, 10000);
     $i = 0;
     foreach ($chunks as $chunk) {
         echo "{$i} " . memory_get_usage() . " " . time_elapsed() . "\n";
         $query = "SELECT taxon_concept_id, id FROM hierarchy_entries FORCE INDEX (concept_published_visible) WHERE taxon_concept_id IN (" . implode(",", $chunk) . ")  AND ((published=1 AND visibility_id=" . Visibility::visible()->id . ") OR (published=0 AND visibility_id=" . Visibility::preview()->id . "))";
         foreach ($this->mysqli_slave->iterate_file($query) as $row_num => $row) {
             if (!isset($this->baseline_hierarchy_entry_ids[$row[0]])) {
                 $this->baseline_hierarchy_entry_ids[$row[0]] = array();
             }
             $this->baseline_hierarchy_entry_ids[$row[0]][] = $row[1];
         }
         $i++;
     }
 }