function add_hierarchy_entry(&$row, $parent_hierarchy_entry_id, $ancestry, $branch_kingdom) { self::debug_iterations("Inserting taxon"); self::commit_iterations("Taxa", 500); if ($this->archive_validator->has_error_by_line('http://rs.tdwg.org/dwc/terms/taxon', $row['archive_file_location'], $row['archive_line_number'])) { write_to_resource_harvesting_log("ERROR: add_hierarchy_entry: has_error_by_line" . ",file_location:" . $row['archive_file_location'] . ",line_number:" . $row['archive_line_number']); return false; } // make sure this taxon has a name, otherwise skip this branch $scientific_name = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/scientificName']); $authorship = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/scientificNameAuthorship']); $kingdom = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/kingdom']); $genus = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/genus']); $rank_label = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonRank']); // COL exception if (strtolower($kingdom) == 'viruses') { if (substr($scientific_name, -1) == ":") { $scientific_name = substr($scientific_name, 0, -1); } if (preg_match("/^(.*) ICTV\$/i", $scientific_name, $arr)) { $scientific_name = $arr[1]; } } // COL exception if (strtolower($kingdom) == 'viruses' && $genus && strtolower($rank_label) != 'genus') { if (stripos($scientific_name, $genus) == 0) { $scientific_name = ucfirst(trim(substr($scientific_name, strlen($genus)))); } } else { if ($authorship && stripos($scientific_name, $authorship) === false) { $scientific_name = trim($scientific_name . " " . $authorship); } } if (!$scientific_name) { return false; } $taxon_id = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonID']); if (!$taxon_id) { $taxon_id = @self::field_decode($row['http://purl.org/dc/terms/identifier']); } if (!$taxon_id) { debug("ERROR - no taxon ID for {$scientific_name}, skipping"); return false; } if (isset($this->taxon_ids_inserted[$taxon_id])) { // this taxon_id has already been inserted meaning this tree has a loop in it - so stop debug("ERROR - taxon ID ({$taxon_id}) for {$scientific_name} already inserted; LOOP?"); return false; } $scientific_name = ucfirst($scientific_name); $name = Name::find_or_create_by_string($scientific_name); if (@(!$name->id)) { debug("ERROR - Failed to insert name: {$scientific_name}"); return false; } $phylum = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/phylum']); $class = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/class']); $order = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/order']); $family = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/family']); $rank = Rank::find_or_create_by_translated_label($rank_label); $dataset_id = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/datasetID']); $taxonomic_status = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonomicStatus']); $source_url = @self::field_decode($row['http://rs.tdwg.org/ac/terms/furtherInformationURL']); if (!$source_url) { $source_url = @self::field_decode($row['http://purl.org/dc/terms/source']); } if (!$source_url) { $source_url = @self::field_decode($row['http://purl.org/dc/terms/references']); } if (!$source_url) { $source_url = @self::field_decode($row['http://purl.org/dc/terms/isReferencedBy']); } if (isset($row['http://rs.tdwg.org/dwc/terms/taxonRemarks'])) { $taxon_remarks = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonRemarks']); } else { $taxon_remarks = NULL; } if (!$taxon_remarks && strtolower($taxonomic_status) == 'provisionally accepted name') { $taxon_remarks = "provisionally accepted name"; } // TODO: This block is somewhat confusing. Clearly, it's clearing the // rank that's currently being read, but shouldn't it also clear all of // the ranks below that? if (strtolower($rank_label) == 'kingdom') { $kingdom = null; } if (strtolower($rank_label) == 'phylum') { $phylum = null; } if (strtolower($rank_label) == 'class') { $class = null; } if (strtolower($rank_label) == 'order') { $order = null; } if (strtolower($rank_label) == 'family') { $family = null; } if (strtolower($rank_label) == 'genus') { $genus = null; } // these are the taxa using the adjacency list format if (!$parent_hierarchy_entry_id && ($kingdom || $phylum || $class || $order || $family || $genus)) { $params = array("identifier" => $taxon_id, "source_url" => $source_url, "kingdom" => ucfirst($kingdom), "phylum" => ucfirst($phylum), "class" => ucfirst($class), "order" => ucfirst($order), "family" => ucfirst($family), "genus" => ucfirst($genus), "scientificName" => ucfirst($scientific_name), "name" => $name, "rank" => $rank, "taxon_remarks" => $taxon_remarks); $hierarchy_entry = HierarchyEntry::create_entries_for_taxon($params, $this->harvest_event->resource->hierarchy_id); if (@(!$hierarchy_entry->id)) { debug("ERROR - unable to insert hierarchy entry for {$scientific_name}"); return; } // NOTE: This is NOT adding a hierarchy entry, but a // harvest_event_hierarchy_entry: // TODO: I am not sure this adds entries for ancestors! $this->harvest_event->add_hierarchy_entry($hierarchy_entry, 'inserted'); $this->taxon_ids_inserted[$taxon_id] = array('hierarchy_entry_id' => $hierarchy_entry->id, 'taxon_concept_id' => $hierarchy_entry->taxon_concept_id, 'source_url' => $source_url); self::compress_array($this->taxon_ids_inserted[$taxon_id]); } else { $params = array("identifier" => $taxon_id, "source_url" => $source_url, "name_id" => $name->id, "parent_id" => $parent_hierarchy_entry_id, "hierarchy_id" => $this->harvest_event->resource->hierarchy_id, "rank" => $rank, "ancestry" => $ancestry, "taxon_remarks" => $taxon_remarks); $hierarchy_entry = HierarchyEntry::find_or_create_by_array($params); if (@(!$hierarchy_entry->id)) { return; } $this->harvest_event->add_hierarchy_entry($hierarchy_entry, 'inserted'); $this->taxon_ids_inserted[$taxon_id] = array('hierarchy_entry_id' => $hierarchy_entry->id, 'taxon_concept_id' => $hierarchy_entry->taxon_concept_id, 'source_url' => $source_url); self::compress_array($this->taxon_ids_inserted[$taxon_id]); } if (!isset($this->entry_references_deleted[$hierarchy_entry->id])) { $hierarchy_entry->delete_refs(); $this->entry_references_deleted[$hierarchy_entry->id] = true; } if (!isset($this->entry_vernacular_names_deleted[$hierarchy_entry->id])) { $this->mysqli->delete("DELETE FROM synonyms WHERE hierarchy_entry_id={$hierarchy_entry->id} AND hierarchy_entry_id={$hierarchy_entry->id} AND hierarchy_id=" . $this->harvest_event->resource->hierarchy_id . " AND language_id!=0 AND language_id!=" . Language::find_or_create_for_parser('scientific name')->id); $this->entry_vernacular_names_deleted[$hierarchy_entry->id] = true; } if (!isset($this->entry_synonyms_deleted[$hierarchy_entry->id])) { $hierarchy_entry->delete_synonyms(); $this->entry_synonyms_deleted[$hierarchy_entry->id] = true; } if ($name_published_in = @$row['http://rs.tdwg.org/dwc/terms/namePublishedIn']) { $individual_references = explode("||", $name_published_in); foreach ($individual_references as $reference_string) { $reference = Reference::find_or_create_by_full_reference(trim($reference_string)); if (@$reference->id) { $hierarchy_entry->add_reference($reference->id); $this->mysqli->query("UPDATE refs SET published=1, visibility_id=" . Visibility::visible()->id . " WHERE id={$reference->id}"); } } } // keep track of reference foreign keys self::append_foreign_keys_from_row($row, 'http://eol.org/schema/reference/referenceID', $this->taxon_reference_ids, $hierarchy_entry->id); if (isset($this->synonyms[$taxon_id])) { foreach ($this->synonyms[$taxon_id] as $synonym_row) { self::uncompress_array($synonym_row); $synonym_scientific_name = @self::field_decode($synonym_row['http://rs.tdwg.org/dwc/terms/scientificName']); $synonym_authorship = @self::field_decode($synonym_row['http://rs.tdwg.org/dwc/terms/scientificNameAuthorship']); if ($synonym_authorship && stripos($synonym_scientific_name, $synonym_authorship) === false) { $synonym_scientific_name = trim($synonym_scientific_name . " " . $synonym_authorship); } if (!$synonym_scientific_name) { continue; } $synonym_taxon_id = @self::field_decode($synonym_row['http://rs.tdwg.org/dwc/terms/taxonID']); if (!$synonym_taxon_id) { $taxon_id = @self::field_decode($synonym_row['http://purl.org/dc/terms/identifier']); } if (!$synonym_taxon_id) { continue; } $synonym_name = Name::find_or_create_by_string(ucfirst($synonym_scientific_name)); if (@(!$synonym_name->id)) { continue; } $taxonomic_status = @self::field_decode($synonym_row['http://rs.tdwg.org/dwc/terms/taxonomicStatus']) ?: 'synonym'; if (isset($synonym_row['http://rs.tdwg.org/dwc/terms/taxonRemarks'])) { $taxon_remarks = @self::field_decode($synonym_row['http://rs.tdwg.org/dwc/terms/taxonRemarks']); } else { $taxon_remarks = NULL; } $synonym_relation = SynonymRelation::find_or_create_by_translated_label($taxonomic_status); $hierarchy_entry->add_synonym($synonym_name->id, @$synonym_relation->id ?: 0, 0, 0, 0, 0, $taxon_remarks); } unset($this->synonyms[$taxon_id]); } // COL exception if ($dataset_id && isset($this->dataset_metadata[$dataset_id]) && ($metadata = $this->dataset_metadata[$dataset_id])) { $hierarchy_entry->delete_agents(); $agent_name = $metadata['title']; if ($editors = $metadata['editors']) { $agent_name .= " by {$editors}"; } $params = array("full_name" => $agent_name, "agent_role" => AgentRole::find_or_create_by_translated_label('Source')); $agent = Agent::find_or_create($params); $hierarchy_entry->add_agent($agent->id, @$a['agent_role']->id ?: 0, 0); $reference = Reference::find_or_create(array("full_reference" => $metadata['citation'])); $this->mysqli->insert("INSERT IGNORE INTO hierarchy_entries_refs (hierarchy_entry_id, ref_id) VALUES ({$hierarchy_entry->id}, {$reference->id})"); $this->mysqli->query("UPDATE refs SET published=1, visibility_id=" . Visibility::visible()->id . " WHERE id={$reference->id}"); } $parameters = array('archive_table_definition' => (object) array('row_type' => 'http://rs.tdwg.org/dwc/terms/Taxon')); $this->insert_data($row, $parameters); if (isset($this->children[$taxon_id])) { // set the ancestry for its children if ($ancestry) { $this_ancestry = $ancestry . "|" . $name->id; } else { $this_ancestry = $name->id; } foreach ($this->children[$taxon_id] as &$row) { self::uncompress_array($row); $this->add_hierarchy_entry($row, $hierarchy_entry->id, $this_ancestry, $branch_kingdom); } unset($this->children[$taxon_id]); } unset($hierarchy_entry); unset($row); }
public static function read_taxon_xml($t, $resource) { $t_dc = $t->children("http://purl.org/dc/elements/1.1/"); $t_dcterms = $t->children("http://purl.org/dc/terms/"); $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/"); $taxon_parameters = array(); $taxon_parameters["identifier"] = Functions::import_decode($t_dc->identifier); $taxon_parameters["source_url"] = Functions::import_decode($t_dc->source); $taxon_parameters["kingdom"] = Functions::import_decode($t_dwc->Kingdom); $taxon_parameters["phylum"] = Functions::import_decode($t_dwc->Phylum); $taxon_parameters["class"] = Functions::import_decode($t_dwc->Class); $taxon_parameters["order"] = Functions::import_decode($t_dwc->Order); $taxon_parameters["family"] = Functions::import_decode($t_dwc->Family); $taxon_parameters["genus"] = Functions::import_decode($t_dwc->Genus); $taxon_parameters["scientific_name"] = Functions::import_decode($t_dwc->ScientificName); $taxon_parameters["rank"] = Rank::find_or_create_by_translated_label(Functions::import_decode($t->rank)); $taxon_parameters["taxon_created_at"] = trim($t_dcterms->created); $taxon_parameters["taxon_modified_at"] = trim($t_dcterms->modified); if ($taxon_parameters["scientific_name"]) { $taxon_parameters["name"] = Name::find_or_create_by_string($taxon_parameters["scientific_name"]); } else { if ($name = $taxon_parameters["genus"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["genus"] = ""; } elseif ($name = $taxon_parameters["family"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["family"] = ""; } elseif ($name = $taxon_parameters["order"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["order"] = ""; } elseif ($name = $taxon_parameters["class"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["class"] = ""; } elseif ($name = $taxon_parameters["phylum"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["phylum"] = ""; } elseif ($name = $taxon_parameters["kingdom"]) { $taxon_parameters["scientific_name"] = $name; $taxon_parameters["name"] = Name::find_or_create_by_string($name); $taxon_parameters["kingdom"] = ""; } else { return; } } $taxon_parameters["common_names"] = array(); foreach ($t->commonName as $c) { $common_name = Functions::import_decode((string) $c); if (!$common_name) { continue; } $xml_attr = $c->attributes("http://www.w3.org/XML/1998/namespace"); $params = array("name" => $common_name, "language" => Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"]))); $taxon_parameters["common_names"][] = $params; } $taxon_parameters["synonyms"] = array(); foreach ($t->synonym as $s) { $synonym = Functions::import_decode((string) $s); if (!$synonym) { continue; } $attr = $s->attributes(); if (!@$attr["relationship"]) { $attr["relationship"] = 'synonym'; } $params = array("name" => Name::find_or_create_by_string($synonym), "synonym_relation" => SynonymRelation::find_or_create_by_translated_label(trim($attr["relationship"]))); $taxon_parameters["synonyms"][] = $params; } $taxon_parameters["agents"] = array(); foreach ($t->agent as $a) { $agent_name = Functions::import_decode((string) $a); if (!$agent_name) { continue; } $attr = $a->attributes(); $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"]))); $taxon_parameters["agents"][] = $params; unset($params); } $taxon_parameters["refs"] = array(); foreach ($t->reference as $r) { $reference = Functions::import_decode((string) $r, 0, 0); if (!$reference) { continue; } $ref = Reference::find_or_create_by_full_reference($reference); $taxon_parameters["refs"][] = $ref; $id_labels = array("bici", "coden", "doi", "eissn", "handle", "issn", "isbn", "lsid", "oclc", "sici", "url", "urn"); $attr = $r->attributes(); foreach ($id_labels as $label) { if ($id = @Functions::import_decode($attr[$label], 0, 0)) { $type = RefIdentifierType::find_or_create_by_label($label); $ref->add_ref_identifier(@$type->id ?: 0, $id); } } } $taxon_parameters["data_objects"] = array(); foreach ($t->dataObject as $d) { $d_dc = $d->children("http://purl.org/dc/elements/1.1/"); $d_dcterms = $d->children("http://purl.org/dc/terms/"); $d_geo = $d->children("http://www.w3.org/2003/01/geo/wgs84_pos#"); $data_object = new DataObject(); $data_object->identifier = Functions::import_decode($d_dc->identifier); $data_object->data_type = DataType::find_or_create_by_schema_value(Functions::import_decode($d->dataType)); $data_object->mime_type = MimeType::find_or_create_by_translated_label(Functions::import_decode($d->mimeType)); $data_object->object_created_at = Functions::import_decode($d_dcterms->created); $data_object->object_modified_at = Functions::import_decode($d_dcterms->modified); $data_object->object_title = Functions::import_decode($d_dc->title, 0, 0); $data_object->language = Language::find_or_create_for_parser(Functions::import_decode($d_dc->language)); $data_object->license = License::find_or_create_for_parser(Functions::import_decode($d->license)); $data_object->rights_statement = Functions::import_decode($d_dc->rights, 0, 0); $data_object->rights_holder = Functions::import_decode($d_dcterms->rightsHolder, 0, 0); $data_object->bibliographic_citation = Functions::import_decode($d_dcterms->bibliographicCitation, 0, 0); $data_object->source_url = Functions::import_decode($d_dc->source); $data_object->description = Functions::import_decode($d_dc->description, 0, 0); $data_object->object_url = Functions::import_decode($d->mediaURL); $data_object->thumbnail_url = Functions::import_decode($d->thumbnailURL); $data_object->location = Functions::import_decode($d->location, 0, 0); if (@$d->additionalInformation) { $data_object->additional_information = (array) $d->additionalInformation; } if ($r = (string) @$d->additionalInformation->rating) { if (is_numeric($r) && $r > 0 && $r <= 5) { $data_object->data_rating = $r; } } if ($subtype = @$d->additionalInformation->subtype) { if ($dt = DataType::find_or_create_by_schema_value(Functions::import_decode($subtype))) { $data_object->data_subtype_id = $dt->id; } } $data_object_parameters = array(); if (!$data_object->language) { $xml_attr = $d_dc->description->attributes("http://www.w3.org/XML/1998/namespace"); $data_object->language = Language::find_or_create_for_parser(@Functions::import_decode($xml_attr["lang"])); } if (!$data_object->language && $resource->language) { $data_object->language = $resource->language; } //TODO - update this if ($data_object->mime_type && $data_object->mime_type->equals(MimeType::flash()) && $data_object->is_video()) { $data_object->data_type = DataType::youtube(); $data_object->data_type_id = DataType::youtube()->id; } //take the taxon's source_url if none present if (!@$data_object->source_url && @$taxon_parameters["source_url"]) { $data_object->source_url = $taxon_parameters["source_url"]; } // Turn newlines into paragraphs $data_object->description = str_replace("\n", "</p><p>", $data_object->description); /* Checking requirements*/ //if text: must have description if ($data_object->data_type->equals(DataType::text()) && !$data_object->description) { continue; } //if image, movie or sound: must have object_url if (($data_object->data_type->equals(DataType::video()) || $data_object->data_type->equals(DataType::sound()) || $data_object->data_type->equals(DataType::image())) && !$data_object->object_url) { continue; } $data_object->latitude = 0; $data_object->longitude = 0; $data_object->altitude = 0; foreach ($d_geo->Point as $p) { $p_geo = $p->children("http://www.w3.org/2003/01/geo/wgs84_pos#"); $data_object->latitude = Functions::import_decode($p_geo->lat); $data_object->longitude = Functions::import_decode($p_geo->long); $data_object->altitude = Functions::import_decode($p_geo->alt); } $data_object_parameters["agents"] = array(); foreach ($d->agent as $a) { $agent_name = Functions::import_decode((string) $a); if (!$agent_name) { continue; } $attr = $a->attributes(); $params = array("full_name" => Functions::import_decode((string) $a, 0, 0), "homepage" => @Functions::import_decode($attr["homepage"]), "logo_url" => @Functions::import_decode($attr["logoURL"]), "agent_role" => AgentRole::find_or_create_by_translated_label(@trim($attr["role"]))); $data_object_parameters["agents"][] = $params; unset($params); } $data_object_parameters["audiences"] = array(); foreach ($d->audience as $a) { $data_object_parameters["audiences"][] = Audience::find_or_create_by_translated_label(trim((string) $a)); } $data_object_parameters["info_items"] = array(); foreach ($d->subject as $s) { $data_object_parameters["info_items"][] = InfoItem::find_or_create_by_schema_value(trim((string) $s)); } if ($subject = @$d->additionalInformation->subject) { if ($ii = InfoItem::find_or_create_by_schema_value(trim((string) $subject))) { $data_object_parameters["info_items"] = array($ii); } } // EXCEPTIONS if ($data_object->is_text()) { if ($resource->title == "BOLD Systems Resource") { // EXCEPTION - overriding the subject for BOLD $data_object_parameters["info_items"] = array(InfoItem::find_or_create_by_schema_value('http://www.eol.org/voc/table_of_contents#Barcode')); } elseif ($resource->title == "Wikipedia") { // EXCEPTION - overriding the subject for Wikipedia $data_object_parameters["info_items"] = array(InfoItem::find_or_create_by_schema_value('http://www.eol.org/voc/table_of_contents#Wikipedia')); } elseif ($resource->title == "IUCN Red List") { if ($data_object->object_title == "IUCNConservationStatus") { // EXCEPTION - overriding the data type for IUCN text $data_object->data_type_id = DataType::iucn()->id; $data_object->data_type = DataType::iucn(); } } } $data_object_parameters["refs"] = array(); foreach ($d->reference as $r) { $reference = Functions::import_decode((string) $r, 0, 0); if (!$reference) { continue; } $ref = Reference::find_or_create_by_full_reference($reference); $data_object_parameters["refs"][] = $ref; $id_labels = array("bici", "coden", "doi", "eissn", "handle", "issn", "isbn", "lsid", "oclc", "sici", "url", "urn"); $attr = $r->attributes(); foreach ($id_labels as $label) { if ($id = @Functions::import_decode($attr[$label], 0, 0)) { $type = RefIdentifierType::find_or_create_by_label($label); $ref->add_ref_identifier(@$type->id ?: 0, $id); } } } $taxon_parameters["data_objects"][] = array($data_object, $data_object_parameters); unset($data_object); } return $taxon_parameters; }