예제 #1
0
 function add_taxon($t)
 {
     $hierarchy_entry = HierarchyEntry::create_entries_for_taxon($t, $this->resource->hierarchy_id);
     if (@(!$hierarchy_entry->id)) {
         return false;
     }
     $this->resource->harvest_event->add_hierarchy_entry($hierarchy_entry, 'inserted');
     $hierarchy_entry->delete_common_names();
     if (@$t['common_names']) {
         foreach ($t['common_names'] as &$c) {
             $name = Name::find_or_create_by_string($c['name']);
             $hierarchy_entry->add_synonym($name->id, SynonymRelation::find_or_create_by_translated_label('common name')->id, @$c['language']->id ?: 0, 0);
         }
     }
     $hierarchy_entry->delete_synonyms();
     if (@$t['synonyms']) {
         foreach ($t['synonyms'] as &$s) {
             $hierarchy_entry->add_synonym($s['name']->id, @$s['synonym_relation']->id ?: 0, 0, 0);
         }
     }
     $hierarchy_entry->delete_agents();
     if (@$t['agents']) {
         $i = 0;
         foreach ($t['agents'] as &$a) {
             $agent = Agent::find_or_create($a);
             if ($agent->logo_url && !$agent->logo_cache_url) {
                 if ($logo_cache_url = $this->content_manager->grab_file($agent->logo_url, "partner")) {
                     $agent->logo_cache_url = $logo_cache_url;
                     $agent->save();
                 }
             }
             $hierarchy_entry->add_agent($agent->id, @$a['agent_role']->id ?: 0, $i);
             unset($a);
             $i++;
         }
     }
     $hierarchy_entry->delete_refs();
     if (@$t['refs']) {
         foreach ($t['refs'] as &$r) {
             if (@$r->id) {
                 $hierarchy_entry->add_reference($r->id);
                 $r->publish();
             }
             unset($r);
         }
     }
     foreach ($t['data_objects'] as &$d) {
         $this->add_data_object($hierarchy_entry, $d);
         unset($d);
     }
     return $hierarchy_entry;
 }
예제 #2
0
 public function insert_vernacular_names($row, $parameters)
 {
     self::debug_iterations("Inserting VernacularName");
     $this->commit_iterations("VernacularName", 500);
     if ($this->archive_validator->has_error_by_line('http://rs.gbif.org/terms/1.0/vernacularname', $parameters['archive_table_definition']->location, $parameters['archive_line_number'])) {
         write_to_resource_harvesting_log("ERROR: insert_vernacular_names: has_error_by_line" . ",file_location:" . $parameters['archive_table_definition']->location . ",line_number:" . $parameters['archive_line_number']);
         return false;
     }
     $taxon_ids = self::get_foreign_keys_from_row($row, 'http://rs.tdwg.org/dwc/terms/taxonID');
     $taxon_info = array();
     if ($taxon_ids) {
         foreach ($taxon_ids as $taxon_id) {
             if ($taxon_info = @$this->taxon_ids_inserted[$taxon_id]) {
                 self::uncompress_array($taxon_info);
                 $taxon_info[] = $taxon_info;
             }
         }
     }
     if (!$taxon_info) {
         return false;
     }
     $vernacularName = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/vernacularName']);
     $source = @self::field_decode($row['http://purl.org/dc/terms/source']);
     $languageString = @self::field_decode($row['http://purl.org/dc/terms/language']);
     $locality = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/locality']);
     $countryCode = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/countryCode']);
     $isPreferredName = @self::field_decode($row['http://rs.gbif.org/terms/1.0/isPreferredName']);
     $taxonRemarks = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonRemarks']);
     $name = Name::find_or_create_by_string($vernacularName);
     $language = Language::find_or_create_for_parser($languageString);
     if (!$name) {
         return false;
     }
     foreach ($taxon_info as $info) {
         $he_id = $taxon_info['hierarchy_entry_id'];
         $tc_id = $taxon_info['taxon_concept_id'];
         $common_name_relation = SynonymRelation::find_or_create_by_translated_label('common name');
         $result = $this->mysqli->query("SELECT SQL_NO_CACHE id FROM synonyms" . " WHERE name_id = " . $name->id . " AND synonym_relation_id = " . $common_name_relation->id . " AND hierarchy_entry_id = " . $he_id . " AND hierarchy_id = " . $this->harvest_event->resource->hierarchy_id . " AND identifier = " . $taxon_ids[0]);
         if ($result && $result->fetch_assoc()) {
             $l_id = @$language->id ?: 0;
             $GLOBALS['db_connection']->update("UPDATE synonyms SET" . " language_id = " . $l_id . ", published = 0" . ", taxon_remarks = '" . $taxonRemarks . "' WHERE name_id = " . $name->id . " AND synonym_relation_id = " . $common_name_relation->id . " AND hierarchy_entry_id = " . $he_id . " AND hierarchy_id = " . $this->harvest_event->resource->hierarchy_id . "AND identifier = " . $taxon_ids[0]);
             break;
         } else {
             Synonym::find_or_create(array('name_id' => $name->id, 'synonym_relation_id' => $common_name_relation->id, 'language_id' => @$language->id ?: 0, 'hierarchy_entry_id' => $he_id, 'preferred' => $isPreferredName != '', 'hierarchy_id' => $this->harvest_event->resource->hierarchy_id, 'vetted_id' => 0, 'published' => 0, 'taxonRemarks' => $taxonRemarks, 'identifier' => $taxon_ids[0]));
             break;
         }
     }
 }
예제 #3
0
 public static function update_taxon_concept_names($taxon_concept_ids)
 {
     if (!$taxon_concept_ids) {
         return false;
     }
     if (is_numeric($taxon_concept_ids)) {
         $taxon_concept_ids = array($taxon_concept_ids);
     }
     $mysqli =& $GLOBALS['db_connection'];
     $started_new_transaction = false;
     if (!$mysqli->in_transaction()) {
         $mysqli->begin_transaction();
         $started_new_transaction = true;
     }
     $batches = array_chunk($taxon_concept_ids, 500);
     foreach ($batches as $batch_ids) {
         usleep(500000);
         $name_ids = array();
         $matching_ids = array();
         $query = "\n            (SELECT he.taxon_concept_id, he.id, he.name_id, 'preferred' as type FROM hierarchy_entries he WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))\n            UNION\n            (SELECT he.taxon_concept_id, s.hierarchy_entry_id, s.name_id, 'synonym' as type\n            FROM hierarchy_entries he\n            JOIN synonyms s ON (he.id=s.hierarchy_entry_id)\n            WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ")\n            AND s.language_id=0\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('blast name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank acronym')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('acronym')->id . "\n            AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $hierarchy_entry_id = $row[1];
             $name_id = $row[2];
             $name_type = $row[3];
             $name_ids[$name_id][$taxon_concept_id] = 1;
             $matching_ids[$taxon_concept_id][$name_id][$hierarchy_entry_id] = $name_type;
         }
         if ($name_ids) {
             //This makes sure we have a scientific name, gets the canonicalFormID
             $query = "SELECT n.id, n_match.id FROM names n JOIN canonical_forms cf ON (n.canonical_form_id=cf.id) JOIN names n_match ON (cf.id=n_match.canonical_form_id) WHERE n.id IN (" . implode(",", array_keys($name_ids)) . ") AND n_match.string=cf.string";
             foreach ($mysqli->iterate_file($query) as $row_num => $row) {
                 $original_name_id = $row[0];
                 $canonical_name_id = $row[1];
                 if ($original_name_id != $canonical_name_id) {
                     foreach ($name_ids[$original_name_id] as $taxon_concept_id => $junk) {
                         $matching_ids[$taxon_concept_id][$canonical_name_id][0] = 1;
                     }
                 }
             }
         }
         $common_names = array();
         $preferred_in_language = array();
         $query = "SELECT he.taxon_concept_id, he.published, he.visibility_id, s.id, s.hierarchy_id, s.hierarchy_entry_id, s.name_id, s.language_id, s.preferred, s.vetted_id FROM hierarchy_entries he JOIN synonyms s ON (he.id=s.hierarchy_entry_id) JOIN vetted v ON (s.vetted_id=v.id) WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND s.language_id!=0 AND (s.synonym_relation_id=" . SynonymRelation::genbank_common_name()->id . " OR s.synonym_relation_id=" . SynonymRelation::common_name()->id . ") ORDER BY s.language_id, (s.hierarchy_id=" . Hierarchy::contributors()->id . ") DESC, v.view_order ASC, s.preferred DESC, s.id DESC";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $published = $row[1];
             $visibility_id = $row[2];
             $synonym_id = $row[3];
             $hierarchy_id = $row[4];
             $hierarchy_entry_id = $row[5];
             $name_id = $row[6];
             $language_id = $row[7];
             $preferred = $row[8];
             $vetted_id = $row[9];
             // skipping Wikipedia common names entirely
             if ($hierarchy_id == @Hierarchy::wikipedia()->id) {
                 continue;
             }
             $curator_name = $hierarchy_id == @Hierarchy::contributors()->id;
             $ubio_name = $hierarchy_id == @Hierarchy::ubio()->id;
             if ($curator_name || $ubio_name || $curator_name || $published == 1 && $visibility_id == Visibility::visible()->id) {
                 if (isset($preferred_in_language[$taxon_concept_id][$language_id])) {
                     $preferred = 0;
                 }
                 if ($preferred && $curator_name && ($vetted_id == Vetted::trusted()->id || $vetted_id == Vetted::unknown()->id)) {
                     $preferred_in_language[$taxon_concept_id][$language_id] = 1;
                 } else {
                     $preferred = 0;
                 }
                 if (!isset($common_names[$taxon_concept_id])) {
                     $common_names[$taxon_concept_id] = array();
                 }
                 $common_names[$taxon_concept_id][] = array('synonym_id' => $synonym_id, 'language_id' => $language_id, 'name_id' => $name_id, 'hierarchy_entry_id' => $hierarchy_entry_id, 'preferred' => $preferred, 'vetted_id' => $vetted_id, 'is_curator_name' => $curator_name);
             }
         }
         // if there was no preferred name
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 if (@(!$preferred_in_language[$taxon_concept_id][$arr2['language_id']]) && ($arr2['vetted_id'] == Vetted::trusted()->id || $arr2['vetted_id'] == Vetted::unknown()->id)) {
                     $common_names[$taxon_concept_id][$key]['preferred'] = 1;
                     $preferred_in_language[$taxon_concept_id][$arr2['language_id']] = 1;
                 }
             }
         }
         $mysqli->delete("DELETE FROM taxon_concept_names WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ")");
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the scientific names */
         foreach ($matching_ids as $taxon_concept_id => $arr) {
             foreach ($arr as $name_id => $arr2) {
                 foreach ($arr2 as $hierarchy_entry_id => $type) {
                     $preferred = 0;
                     if ($hierarchy_entry_id && $type == "preferred") {
                         $preferred = 1;
                     }
                     fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t0\t0\t{$preferred}\n");
                 }
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the common names */
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 $synonym_id = $arr2['synonym_id'];
                 $language_id = $arr2['language_id'];
                 $name_id = $arr2['name_id'];
                 $hierarchy_entry_id = $arr2['hierarchy_entry_id'];
                 $preferred = $arr2['preferred'];
                 $vetted_id = $arr2['vetted_id'];
                 fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t{$language_id}\t1\t{$preferred}\t{$synonym_id}\t{$vetted_id}\n");
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         unset($matching_ids);
         unset($common_names);
         unset($name_ids);
         unset($preferred_in_language);
         $mysqli->commit();
     }
     if ($started_new_transaction) {
         $mysqli->end_transaction();
     }
 }
 public static function read_taxon_xml($t)
 {
     $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
     $t_dcterms = $t->children("http://purl.org/dc/terms/");
     $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
     $taxon_parameters = array();
     $taxon_parameters["identifier"] = Functions::import_decode($t_dc->identifier);
     $taxon_parameters["source_url"] = Functions::import_decode($t_dc->source);
     $taxon_parameters["kingdom"] = Functions::import_decode($t_dwc->Kingdom);
     $taxon_parameters["phylum"] = Functions::import_decode($t_dwc->Phylum);
     $taxon_parameters["class"] = Functions::import_decode($t_dwc->Class);
     $taxon_parameters["order"] = Functions::import_decode($t_dwc->Order);
     $taxon_parameters["family"] = Functions::import_decode($t_dwc->Family);
     $taxon_parameters["genus"] = Functions::import_decode($t_dwc->Genus);
     $taxon_parameters["scientific_name"] = Functions::import_decode($t_dwc->ScientificName);
     $taxon_parameters["taxon_created_at"] = trim($t_dcterms->created);
     $taxon_parameters["taxon_modified_at"] = trim($t_dcterms->modified);
     if ($taxon_parameters["scientific_name"]) {
         $taxon_parameters["name"] = Name::find_or_create_by_string($taxon_parameters["scientific_name"]);
     } else {
         if ($name = $taxon_parameters["genus"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["genus"] = "";
         } elseif ($name = $taxon_parameters["family"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["family"] = "";
         } elseif ($name = $taxon_parameters["order"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["order"] = "";
         } elseif ($name = $taxon_parameters["class"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["class"] = "";
         } elseif ($name = $taxon_parameters["phylum"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["phylum"] = "";
         } elseif ($name = $taxon_parameters["kingdom"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["kingdom"] = "";
         } else {
             return;
         }
     }
     $taxon_parameters["common_names"] = array();
     foreach ($t->commonName as $c) {
         $common_name = Functions::import_decode((string) $c);
         if (!$common_name) {
             continue;
         }
         $xml_attr = $c->attributes("http://www.w3.org/XML/1998/namespace");
         $params = array("name" => $common_name, "language" => Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"])));
         $taxon_parameters["common_names"][] = $params;
     }
     $taxon_parameters["synonyms"] = array();
     foreach ($t->synonym as $s) {
         $synonym = Functions::import_decode((string) $s);
         if (!$synonym) {
             continue;
         }
         $attr = $s->attributes();
         if (!@$attr["relationship"]) {
             $attr["relationship"] = 'synonym';
         }
         $params = array("name" => Name::find_or_create_by_string($synonym), "synonym_relation" => SynonymRelation::find_or_create_by_translated_label(trim($attr["relationship"])));
         $taxon_parameters["synonyms"][] = $params;
     }
     $taxon_parameters["agents"] = array();
     foreach ($t->agent as $a) {
         $agent_name = Functions::import_decode((string) $a);
         if (!$agent_name) {
             continue;
         }
         $attr = $a->attributes();
         $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"])));
         $taxon_parameters["agents"][] = $params;
         unset($params);
     }
     $taxon_parameters["data_objects"] = array();
     foreach ($t->dataObject as $d) {
         $d_dc = $d->children("http://purl.org/dc/elements/1.1/");
         $d_dcterms = $d->children("http://purl.org/dc/terms/");
         $d_geo = $d->children("http://www.w3.org/2003/01/geo/wgs84_pos#");
         $data_object = new DataObject();
         $data_object->identifier = Functions::import_decode($d_dc->identifier);
         $data_object->object_created_at = Functions::import_decode($d_dcterms->created);
         $data_object->object_modified_at = Functions::import_decode($d_dcterms->modified);
         $data_object->object_title = Functions::import_decode($d_dc->title, 0, 0);
         $data_object->language = Language::find_or_create_for_parser(Functions::import_decode($d_dc->language));
         $data_object->rights_statement = Functions::import_decode($d_dc->rights, 0, 0);
         $data_object->rights_holder = Functions::import_decode($d_dcterms->rightsHolder, 0, 0);
         $data_object->description = Functions::import_decode($d_dc->description, 0, 0);
         $data_object->location = Functions::import_decode($d->location, 0, 0);
         $data_object_parameters = array();
         if (!$data_object->language) {
             $xml_attr = $d_dc->description->attributes("http://www.w3.org/XML/1998/namespace");
             $data_object->language = Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"]));
         }
         //take the taxon's source_url if none present
         if (!@$data_object->source_url && @$taxon_parameters["source_url"]) {
             $data_object->source_url = $taxon_parameters["source_url"];
         }
         $data_object_parameters["agents"] = array();
         foreach ($d->agent as $a) {
             $agent_name = Functions::import_decode((string) $a);
             if (!$agent_name) {
                 continue;
             }
             $attr = $a->attributes();
             $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"])));
             $data_object_parameters["agents"][] = $params;
             unset($params);
         }
         if ($translation_information = @$d->additionalInformation->translation) {
             $data_object->EOLDataObjectID = (string) $translation_information->EOLDataObjectID;
             if ($translator = (string) $translation_information->translator) {
                 $data_object_parameters["agents"][] = self::translation_agent($translator, 'Translator');
             }
             if ($scientificReviewer = (string) $translation_information->scientificReviewer) {
                 $data_object_parameters["agents"][] = self::translation_agent($scientificReviewer, 'Scientific Reviewer');
             }
             if ($linguisticReviewer = (string) $translation_information->linguisticReviewer) {
                 $data_object_parameters["agents"][] = self::translation_agent($linguisticReviewer, 'Linguistic Reviewer');
             }
         }
         $taxon_parameters["data_objects"][] = array($data_object, $data_object_parameters);
         unset($data_object);
     }
     return $taxon_parameters;
 }
 private function lookup_family($name, $synonyms, $ancestors)
 {
     $order = @$ancestors[15];
     $class = @$ancestors[11];
     $phylum = @$ancestors[6];
     if (!$order && !$class && !$phylum) {
         echo "This is a line that doesnt have a order, class or phylum:\n{$line_number}: {$line} :: {$name}\n\n\n";
         exit;
     }
     $synonyms[] = $name;
     $result = $this->mysqli->query("\n            (SELECT n.id name_id, h.id hierarchy_id, h.browsable, he.taxon_concept_id, 'valid' match_type\n                FROM canonical_forms cf\n                JOIN names n ON (cf.id=n.canonical_form_id)\n                JOIN hierarchy_entries he ON (n.id=he.name_id)\n                JOIN hierarchies h ON (he.hierarchy_id=h.id)\n                WHERE cf.string IN ('" . implode("','", $synonyms) . "')\n                AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ")\n            UNION\n            (SELECT n.id name_id, h.id hierarchy_id, h.browsable, he.taxon_concept_id, 'synonym' match_type\n                FROM canonical_forms cf\n                JOIN names n ON (cf.id=n.canonical_form_id)\n                JOIN synonyms s ON (n.id=s.name_id AND s.synonym_relation_id=" . SynonymRelation::synonym()->id . ")\n                JOIN hierarchy_entries he ON (s.hierarchy_entry_id=he.id)\n                JOIN hierarchies h ON (he.hierarchy_id=h.id)\n                WHERE cf.string IN ('" . implode("','", $synonyms) . "')\n                AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ")");
     if ($result && $result->num_rows) {
         return $this->get_best_concept_from_result($result, $name);
     }
 }
예제 #6
0
 public static function read_taxon_xml($t, $resource)
 {
     $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
     $t_dcterms = $t->children("http://purl.org/dc/terms/");
     $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
     $taxon_parameters = array();
     $taxon_parameters["identifier"] = Functions::import_decode($t_dc->identifier);
     $taxon_parameters["source_url"] = Functions::import_decode($t_dc->source);
     $taxon_parameters["kingdom"] = Functions::import_decode($t_dwc->Kingdom);
     $taxon_parameters["phylum"] = Functions::import_decode($t_dwc->Phylum);
     $taxon_parameters["class"] = Functions::import_decode($t_dwc->Class);
     $taxon_parameters["order"] = Functions::import_decode($t_dwc->Order);
     $taxon_parameters["family"] = Functions::import_decode($t_dwc->Family);
     $taxon_parameters["genus"] = Functions::import_decode($t_dwc->Genus);
     $taxon_parameters["scientific_name"] = Functions::import_decode($t_dwc->ScientificName);
     $taxon_parameters["rank"] = Rank::find_or_create_by_translated_label(Functions::import_decode($t->rank));
     $taxon_parameters["taxon_created_at"] = trim($t_dcterms->created);
     $taxon_parameters["taxon_modified_at"] = trim($t_dcterms->modified);
     if ($taxon_parameters["scientific_name"]) {
         $taxon_parameters["name"] = Name::find_or_create_by_string($taxon_parameters["scientific_name"]);
     } else {
         if ($name = $taxon_parameters["genus"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["genus"] = "";
         } elseif ($name = $taxon_parameters["family"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["family"] = "";
         } elseif ($name = $taxon_parameters["order"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["order"] = "";
         } elseif ($name = $taxon_parameters["class"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["class"] = "";
         } elseif ($name = $taxon_parameters["phylum"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["phylum"] = "";
         } elseif ($name = $taxon_parameters["kingdom"]) {
             $taxon_parameters["scientific_name"] = $name;
             $taxon_parameters["name"] = Name::find_or_create_by_string($name);
             $taxon_parameters["kingdom"] = "";
         } else {
             return;
         }
     }
     $taxon_parameters["common_names"] = array();
     foreach ($t->commonName as $c) {
         $common_name = Functions::import_decode((string) $c);
         if (!$common_name) {
             continue;
         }
         $xml_attr = $c->attributes("http://www.w3.org/XML/1998/namespace");
         $params = array("name" => $common_name, "language" => Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"])));
         $taxon_parameters["common_names"][] = $params;
     }
     $taxon_parameters["synonyms"] = array();
     foreach ($t->synonym as $s) {
         $synonym = Functions::import_decode((string) $s);
         if (!$synonym) {
             continue;
         }
         $attr = $s->attributes();
         if (!@$attr["relationship"]) {
             $attr["relationship"] = 'synonym';
         }
         $params = array("name" => Name::find_or_create_by_string($synonym), "synonym_relation" => SynonymRelation::find_or_create_by_translated_label(trim($attr["relationship"])));
         $taxon_parameters["synonyms"][] = $params;
     }
     $taxon_parameters["agents"] = array();
     foreach ($t->agent as $a) {
         $agent_name = Functions::import_decode((string) $a);
         if (!$agent_name) {
             continue;
         }
         $attr = $a->attributes();
         $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"])));
         $taxon_parameters["agents"][] = $params;
         unset($params);
     }
     $taxon_parameters["refs"] = array();
     foreach ($t->reference as $r) {
         $reference = Functions::import_decode((string) $r, 0, 0);
         if (!$reference) {
             continue;
         }
         $ref = Reference::find_or_create_by_full_reference($reference);
         $taxon_parameters["refs"][] = $ref;
         $id_labels = array("bici", "coden", "doi", "eissn", "handle", "issn", "isbn", "lsid", "oclc", "sici", "url", "urn");
         $attr = $r->attributes();
         foreach ($id_labels as $label) {
             if ($id = @Functions::import_decode($attr[$label], 0, 0)) {
                 $type = RefIdentifierType::find_or_create_by_label($label);
                 $ref->add_ref_identifier(@$type->id ?: 0, $id);
             }
         }
     }
     $taxon_parameters["data_objects"] = array();
     foreach ($t->dataObject as $d) {
         $d_dc = $d->children("http://purl.org/dc/elements/1.1/");
         $d_dcterms = $d->children("http://purl.org/dc/terms/");
         $d_geo = $d->children("http://www.w3.org/2003/01/geo/wgs84_pos#");
         $data_object = new DataObject();
         $data_object->identifier = Functions::import_decode($d_dc->identifier);
         $data_object->data_type = DataType::find_or_create_by_schema_value(Functions::import_decode($d->dataType));
         $data_object->mime_type = MimeType::find_or_create_by_translated_label(Functions::import_decode($d->mimeType));
         $data_object->object_created_at = Functions::import_decode($d_dcterms->created);
         $data_object->object_modified_at = Functions::import_decode($d_dcterms->modified);
         $data_object->object_title = Functions::import_decode($d_dc->title, 0, 0);
         $data_object->language = Language::find_or_create_for_parser(Functions::import_decode($d_dc->language));
         $data_object->license = License::find_or_create_for_parser(Functions::import_decode($d->license));
         $data_object->rights_statement = Functions::import_decode($d_dc->rights, 0, 0);
         $data_object->rights_holder = Functions::import_decode($d_dcterms->rightsHolder, 0, 0);
         $data_object->bibliographic_citation = Functions::import_decode($d_dcterms->bibliographicCitation, 0, 0);
         $data_object->source_url = Functions::import_decode($d_dc->source);
         $data_object->description = Functions::import_decode($d_dc->description, 0, 0);
         $data_object->object_url = Functions::import_decode($d->mediaURL);
         $data_object->thumbnail_url = Functions::import_decode($d->thumbnailURL);
         $data_object->location = Functions::import_decode($d->location, 0, 0);
         if (@$d->additionalInformation) {
             $data_object->additional_information = (array) $d->additionalInformation;
         }
         if ($r = (string) @$d->additionalInformation->rating) {
             if (is_numeric($r) && $r > 0 && $r <= 5) {
                 $data_object->data_rating = $r;
             }
         }
         if ($subtype = @$d->additionalInformation->subtype) {
             if ($dt = DataType::find_or_create_by_schema_value(Functions::import_decode($subtype))) {
                 $data_object->data_subtype_id = $dt->id;
             }
         }
         $data_object_parameters = array();
         if (!$data_object->language) {
             $xml_attr = $d_dc->description->attributes("http://www.w3.org/XML/1998/namespace");
             $data_object->language = Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"]));
         }
         if (!$data_object->language && $resource->language) {
             $data_object->language = $resource->language;
         }
         //TODO - update this
         if ($data_object->mime_type && $data_object->mime_type->equals(MimeType::flash()) && $data_object->is_video()) {
             $data_object->data_type = DataType::youtube();
             $data_object->data_type_id = DataType::youtube()->id;
         }
         //take the taxon's source_url if none present
         if (!@$data_object->source_url && @$taxon_parameters["source_url"]) {
             $data_object->source_url = $taxon_parameters["source_url"];
         }
         // Turn newlines into paragraphs
         $data_object->description = str_replace("\n", "</p><p>", $data_object->description);
         /* Checking requirements*/
         //if text: must have description
         if ($data_object->data_type->equals(DataType::text()) && !$data_object->description) {
             continue;
         }
         //if image, movie or sound: must have object_url
         if (($data_object->data_type->equals(DataType::video()) || $data_object->data_type->equals(DataType::sound()) || $data_object->data_type->equals(DataType::image())) && !$data_object->object_url) {
             continue;
         }
         $data_object->latitude = 0;
         $data_object->longitude = 0;
         $data_object->altitude = 0;
         foreach ($d_geo->Point as $p) {
             $p_geo = $p->children("http://www.w3.org/2003/01/geo/wgs84_pos#");
             $data_object->latitude = Functions::import_decode($p_geo->lat);
             $data_object->longitude = Functions::import_decode($p_geo->long);
             $data_object->altitude = Functions::import_decode($p_geo->alt);
         }
         $data_object_parameters["agents"] = array();
         foreach ($d->agent as $a) {
             $agent_name = Functions::import_decode((string) $a);
             if (!$agent_name) {
                 continue;
             }
             $attr = $a->attributes();
             $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"])));
             $data_object_parameters["agents"][] = $params;
             unset($params);
         }
         $data_object_parameters["audiences"] = array();
         foreach ($d->audience as $a) {
             $data_object_parameters["audiences"][] = Audience::find_or_create_by_translated_label(trim((string) $a));
         }
         $data_object_parameters["info_items"] = array();
         foreach ($d->subject as $s) {
             $data_object_parameters["info_items"][] = InfoItem::find_or_create_by_schema_value(trim((string) $s));
         }
         if ($subject = @$d->additionalInformation->subject) {
             if ($ii = InfoItem::find_or_create_by_schema_value(trim((string) $subject))) {
                 $data_object_parameters["info_items"] = array($ii);
             }
         }
         // EXCEPTIONS
         if ($data_object->is_text()) {
             if ($resource->title == "BOLD Systems Resource") {
                 // EXCEPTION - overriding the subject for BOLD
                 $data_object_parameters["info_items"] = array(InfoItem::find_or_create_by_schema_value('http://www.eol.org/voc/table_of_contents#Barcode'));
             } elseif ($resource->title == "Wikipedia") {
                 // EXCEPTION - overriding the subject for Wikipedia
                 $data_object_parameters["info_items"] = array(InfoItem::find_or_create_by_schema_value('http://www.eol.org/voc/table_of_contents#Wikipedia'));
             } elseif ($resource->title == "IUCN Red List") {
                 if ($data_object->object_title == "IUCNConservationStatus") {
                     // EXCEPTION - overriding the data type for IUCN text
                     $data_object->data_type_id = DataType::iucn()->id;
                     $data_object->data_type = DataType::iucn();
                 }
             }
         }
         $data_object_parameters["refs"] = array();
         foreach ($d->reference as $r) {
             $reference = Functions::import_decode((string) $r, 0, 0);
             if (!$reference) {
                 continue;
             }
             $ref = Reference::find_or_create_by_full_reference($reference);
             $data_object_parameters["refs"][] = $ref;
             $id_labels = array("bici", "coden", "doi", "eissn", "handle", "issn", "isbn", "lsid", "oclc", "sici", "url", "urn");
             $attr = $r->attributes();
             foreach ($id_labels as $label) {
                 if ($id = @Functions::import_decode($attr[$label], 0, 0)) {
                     $type = RefIdentifierType::find_or_create_by_label($label);
                     $ref->add_ref_identifier(@$type->id ?: 0, $id);
                 }
             }
         }
         $taxon_parameters["data_objects"][] = array($data_object, $data_object_parameters);
         unset($data_object);
     }
     return $taxon_parameters;
 }
예제 #7
0
 function get_synonyms_count($batch_size = 500000)
 {
     $raw_stats = array();
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         $this->print_status($i, $batch_size);
         $sql = "SELECT he.taxon_concept_id, s.name_id,  s.hierarchy_id\n              FROM hierarchy_entries he FORCE INDEX (concept_published_visible)\n              JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id\n              WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ")\n              AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id;
         if ($this->test_taxon_concept_ids) {
             $sql .= " AND he.taxon_concept_id IN (" . $this->test_taxon_concept_ids . ")";
         } else {
             $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         foreach ($this->mysqli_slave->iterate_file($sql) as $row_number => $row) {
             $taxon_concept_id = trim($row[0]);
             $name_id = trim($row[1]);
             $hierarchy_id = trim($row[2]);
             $raw_stats[$taxon_concept_id]['synonyms'][$name_id] = 1;
             $raw_stats[$taxon_concept_id]['synonym_providers'][$hierarchy_id] = 1;
         }
         //convert associative array to a regular array
         foreach ($raw_stats as $taxon_concept_id => $stats) {
             $new_value = isset($stats['synonyms']) ? count($stats['synonyms']) : '';
             $new_value .= "\t" . (isset($stats['synonym_providers']) ? count($stats['synonym_providers']) : '');
             $raw_stats[$taxon_concept_id] = $new_value;
         }
         $this->save_category_stats($raw_stats, "get_synonyms_count");
         $raw_stats = array();
         if ($this->test_taxon_concept_ids) {
             break;
         }
     }
 }
예제 #8
0
 function get_synonyms_count($batch_size = 500000)
 {
     $time_start = time_elapsed();
     $arr_taxa = array();
     $enable = 1;
     if (!$enable) {
         self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms");
         unset($arr_taxa);
         return;
     }
     $tc_name_id = array();
     $tc_hierarchy_id = array();
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         print "\n synonyms and its providers [11 of 14] {$i} \n";
         $sql = "SELECT he.taxon_concept_id tc_id, s.name_id, s.hierarchy_id h_id FROM hierarchy_entries he JOIN synonyms s ON he.id = s.hierarchy_entry_id JOIN hierarchies h ON s.hierarchy_id = h.id WHERE s.synonym_relation_id NOT IN (" . SynonymRelation::find_by_translated('label', "common name")->id . "," . SynonymRelation::find_by_translated('label', "genbank common name")->id . ") AND h.browsable=1 AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id;
         if (isset($GLOBALS['test_taxon_concept_ids'])) {
             $sql .= " and he.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")";
         } else {
             $sql .= " AND he.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         $outfile = $this->mysqli_slave->select_into_outfile($sql);
         $FILE = fopen($outfile, "r");
         if (!$FILE) {
             print "!! ERROR: Could not read {$outfile}";
             debug("!! ERROR: Could not read {$outfile}");
             return;
         }
         $num_rows = 0;
         while (!feof($FILE)) {
             if ($line = fgets($FILE)) {
                 $num_rows++;
                 $line = trim($line);
                 $fields = explode("\t", $line);
                 $tc_id = trim($fields[0]);
                 $name_id = trim($fields[1]);
                 $h_id = trim($fields[2]);
                 $tc_name_id[$tc_id][$name_id] = '';
                 $tc_hierarchy_id[$tc_id][$h_id] = '';
             }
         }
         fclose($FILE);
         unlink($outfile);
         print "\n num_rows: {$num_rows}";
     }
     foreach ($tc_name_id as $id => $rec) {
         @($arr_taxa[$id]['count'] = sizeof($rec));
     }
     unset($tc_name_id);
     foreach ($tc_hierarchy_id as $id => $rec) {
         @($arr_taxa[$id]['providers'] = sizeof($rec));
     }
     unset($tc_hierarchy_id);
     //convert associative array to a regular array
     foreach ($arr_taxa as $tc_id => $taxon_synonym_counts) {
         $new_value = "";
         $new_value .= "\t" . @$taxon_synonym_counts['count'];
         $new_value .= "\t" . @$taxon_synonym_counts['providers'];
         $arr_taxa[$tc_id] = $new_value;
     }
     print "\n get_synonyms_count():" . (time_elapsed() - $time_start) / 60 . " minutes";
     self::save_totals_to_cumulative_txt($arr_taxa, "tpm_synonyms");
     unset($arr_taxa);
 }