$dataObjects[] = get_data_object($amphibID . "_trends_threats", "Life History, Abundance, Activity, and Special Behaviors", $trends_and_threats, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Threats", $refs, $agents, $pageURL); } if ($relation_to_humans) { $dataObjects[] = get_data_object($amphibID . "_relation_to_humans", "Relation to Humans", $relation_to_humans, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement", $refs, $agents, $pageURL); } if ($description != "") { if ($comments != "") { $description .= $comments; } else { if ($comments != "") { $description = $comments; } } } if ($description) { $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); } /* we didn't get <comments> if($comments) $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); */ foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } $taxa[] = new \SchemaTaxon($taxonParameters); //if($i >= 5) break; //debug } $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return;
$subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Associations"; $data_object_parameters = get_data_object("text", $taxon, "bio_association", $dc_source, $agent_name, $agent_role, $html_biological_associations, $copyright, $image_url, $title, $subject); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); } //end biological_associations //start nematocysts if ($html_nematocysts != "") { $do_count++; $agent_name = ""; $agent_role = ""; $image_url = ""; $copyright = ""; $title = "Biology: Nematocysts"; $dc_source = $url_for_nematocysts; $subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Biology"; $data_object_parameters = get_data_object("text", $taxon, "nematocyst", $dc_source, $agent_name, $agent_role, $html_nematocysts, $copyright, $image_url, $title, $subject); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); } //end $used_taxa[$taxon] = $taxon_parameters; } //with photos //end main loop } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } ////////////////////// --- $new_resource_xml = SchemaDocument::get_taxon_xml($schema_taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource->id . ".xml"; if (!($OUT = fopen($old_resource_path, "w+"))) {
if ($object->specimenPart) { $desc .= "<br>Specimen part: " . $object->specimenPart; } if ($object->developmentalStage) { $desc .= "<br>Developmental stage: " . $object->developmentalStage; } if ($object->sex) { $desc .= "<br>Sex: " . $object->sex; } if ($object->form) { $desc .= "<br>Form: " . $object->form; } if ($desc) { $desc = substr($desc, 4, strlen($desc)); } $data_object_parameters = get_data_object($dc_identifier, $dcterms_created, $dcterms_modified, $copyright_text, $license, $agent, $desc, "image"); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); } $used_taxa[$taxon_identifier] = $taxon_parameters; } } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } $new_resource_xml = \SchemaDocument::get_taxon_xml($schema_taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($old_resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path); return; } fwrite($OUT, $new_resource_xml);
function get_GeneralDescription($legend, $adaptation, $links, $reference, $language, $taxon_identifier, $dc_source) { $dc_identifier = $taxon_identifier . "_GenDesc"; $title = "Description"; if ($language == "es") { $dc_identifier .= "_es"; $title = "Descripción"; } $description = $legend; if ($adaptation != "") { if ($language == "en") { $description .= "<br><br>Adaptation: {$adaptation}"; } elseif ($language == "es") { $description .= "<br><br>Adaptación: {$adaptation}"; } } if ($links != "") { // remove the double qoutes around 'links' $links = str_replace('""', '"', trim($links)); $links = substr($links, 1, strlen($links) - 2); if ($language == "en") { $links = "<br><br>Links:<br>" . str_ireplace("<br><br>", "<br>", $links); } elseif ($language == "es") { $links = "<br><br>Enlaces:<br>" . str_ireplace("<br><br>", "<br>", $links); } $description .= $links; } //$subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription"; $subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#TaxonBiology"; // re-mapped so it will show in Brief Summary and not in Comprehensive Description return get_data_object($dc_identifier, $dc_source, $description, $reference, $subject, $title, $language); }
function start($resource_id) { $new_resource_path = DOC_ROOT . "temp/" . $resource_id . ".xml"; // $file = 'http://localhost/cp/Amphibiaweb/amphib_dump.xml'; $file = 'http://amphibiaweb.org/amphib_dump.xml'; if (!($new_resource_xml = Functions::lookup_with_cache($file, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => 86400)))) { echo "\n\n Content partner's server is down, connector will now terminate.\n"; } else { // These may look like the same wrong characters - but they are several different wrong characters $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "-", $new_resource_xml); if (!($OUT = Functions::file_open($new_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); unset($new_resource_xml); $taxa = array(); $xml = simplexml_load_file($new_resource_path); $total = count($xml->species); $i = 0; foreach (@$xml->species as $species) { $i++; if ($i % 1000 == 0) { echo "\n {$i} of {$total} "; } $amphibID = (int) trim($species->amphib_id); $genus = format_utf8((string) trim($species->genus)); $speciesName = format_utf8((string) trim($species->species)); $order = format_utf8((string) trim($species->ordr)); $family = format_utf8((string) trim($species->family)); $commonNames = format_utf8((string) trim($species->common_name)); $commonNames = explode(",", $commonNames); $submittedBy = format_utf8((string) trim($species->submittedby)); $editedBy = format_utf8((string) trim($species->editedby)); $description = format_utf8((string) trim($species->description)); $distribution = format_utf8((string) trim($species->distribution)); $life_history = format_utf8((string) trim($species->life_history)); $trends_and_threats = format_utf8((string) trim($species->trends_and_threats)); $relation_to_humans = format_utf8((string) trim($species->relation_to_humans)); $comments = format_utf8((string) trim($species->comments)); $ref = format_utf8((string) trim($species->refs)); $separator = "<p>"; $separator = "<p>"; $ref = explode($separator, $ref); $refs = array(); foreach ($ref as $r) { $refs[] = array("fullReference" => trim($r)); } $description = fix_article($description); $distribution = fix_article($distribution); $life_history = fix_article($life_history); $trends_and_threats = fix_article($trends_and_threats); $relation_to_humans = fix_article($relation_to_humans); $comments = fix_article($comments); $pageURL = "http://amphibiaweb.org/cgi/amphib_query?where-genus=" . $genus . "&where-species=" . $speciesName . "&account=amphibiaweb"; if (!$submittedBy) { continue; } $agents = array(); if ($submittedBy) { $parts = preg_split("/(,| and )/", $submittedBy); while (list($key, $val) = each($parts)) { $val = trim($val); if (!$val) { continue; } $agentParameters = array(); $agentParameters["role"] = "author"; $agentParameters["fullName"] = $val; $agents[] = new \SchemaAgent($agentParameters); } } $nameString = trim($genus . " " . $speciesName); $taxonParameters = array(); $taxonParameters["identifier"] = $amphibID; $taxonParameters["source"] = $pageURL; $taxonParameters["kingdom"] = "Animalia"; $taxonParameters["phylum"] = "Chordata"; $taxonParameters["class"] = "Amphibia"; $taxonParameters["order"] = $order; $taxonParameters["family"] = $family; $taxonParameters["scientificName"] = $nameString; foreach ($commonNames as $common_name) { $taxonParameters['commonNames'][] = new \SchemaCommonName(array("name" => $common_name, "language" => "en")); } $taxonParameters["dataObjects"] = array(); $dataObjects = array(); if ($distribution) { $dataObjects[] = get_data_object($amphibID . "_distribution", "Distribution and Habitat", $distribution, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution", $refs, $agents, $pageURL); } if ($life_history) { $dataObjects[] = get_data_object($amphibID . "_life_history", "Life History, Abundance, Activity, and Special Behaviors", $life_history, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Trends", $refs, $agents, $pageURL); } if ($trends_and_threats) { $dataObjects[] = get_data_object($amphibID . "_trends_threats", "Life History, Abundance, Activity, and Special Behaviors", $trends_and_threats, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Threats", $refs, $agents, $pageURL); } if ($relation_to_humans) { $dataObjects[] = get_data_object($amphibID . "_relation_to_humans", "Relation to Humans", $relation_to_humans, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement", $refs, $agents, $pageURL); } if ($description != "") { if ($comments != "") { $description .= $comments; } else { if ($comments != "") { $description = $comments; } } } if ($description) { $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); } /* we didn't get <comments> if($comments) $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); */ foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } $taxa[] = new \SchemaTaxon($taxonParameters); //if($i >= 5) break; //debug } $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); // Functions::set_resource_status_to_force_harvest($resource_id); shell_exec("rm " . $new_resource_path); // Functions::gzip_resource_xml($resource_id); // $elapsed_time_sec = microtime(1)-$timestart; // echo "\n"; // echo "elapsed time = $elapsed_time_sec sec \n"; // echo "elapsed time = " . $elapsed_time_sec/60 . " minutes \n"; // echo "elapsed time = " . $elapsed_time_sec/60/60 . " hours \n"; // echo "\n\n Done processing."; } }
$taxon_parameters["scientificName"] = $taxa; $taxon_parameters["source"] = $home_url; $used_taxa[$taxon] = $taxon_parameters; } if (1 == 1) { //if($do_count == 0)//echo "$wrap$wrap phylum = " . $taxa . "$wrap"; $dc_source = $home_url; $do_count++; $agent_name = $photo_credit; $agent_role = "photographer"; /* for debugging $image_url = "http://127.0.0.1/test.tif"; $image_url = "http://www.findingspecies.org/indu/images/YIH_13569_MED_EOL.TIFF"; */ // /* just debug; no images for now $data_object_parameters = get_data_object("image", $taxon, $do_count, $dc_source, $agent_name, $agent_role, $desc_pic, $copyright, $image_url, ""); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); // */ /* no text descriptions per Katja if($desc_taxa != "") { $temp = trim(strip_tags($desc_taxa)); if(substr($temp,0,9) != "Outlinks:") { if(substr($temp,0,11) == "Categories:") $title="Categories"; //$desc_taxa="<b>Discussion on disease(s) caused by this organism:</b>" . $desc_taxa; $do_count++; $agent_name = $providers; $agent_role = "source"; $data_object_parameters = get_data_object("text",$taxon,$do_count,$dc_source,$agent_name,$agent_role,$desc_taxa,$copyright,$image_url,$title); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters);
function assign_variables($sciname, $desc, $arr_agents, $dc_rights, $dc_source, $do_count) { global $species_list; global $used_taxa; global $keys_url; global $wrap; global $not_found; //$genus = substr($sciname,0,stripos($sciname," ")); //if(isset(@$species_list["$sciname"]["symbol"])) if (@$species_list["{$sciname}"]["symbol"] != "") { $taxon_identifier = @$species_list[$sciname]["symbol"] . "_" . str_ireplace(" ", "_", $sciname); $source_url = $dc_source . @$species_list[$sciname]["symbol"]; $do_identifier = $taxon_identifier . "_USDA_keys_object"; } else { $taxon_identifier = str_ireplace(" ", "_", $sciname) . "_USDA_keys"; $source_url = $keys_url; $do_identifier = str_ireplace(" ", "_", $sciname) . "_USDA_keys_object"; /* $not_found++; print("<hr> $not_found not found in USDA list xxxyyy $sciname <hr>");//debug */ } if (@$used_taxa[$taxon_identifier]) { $taxon_parameters = $used_taxa[$taxon_identifier]; } else { $taxon_parameters = array(); $taxon_parameters["identifier"] = $taxon_identifier; $taxon_parameters["kingdom"] = trim(@$species_list["{$sciname}"]["Kingdom"]); $taxon_parameters["class"] = trim(@$species_list["{$sciname}"]["Class"]); $taxon_parameters["order"] = trim(@$species_list["{$sciname}"]["Order"]); $taxon_parameters["family"] = trim(@$species_list["{$sciname}"]["Family"]); $taxon_parameters["genus"] = trim(@$species_list["{$sciname}"]["Genus"]); $taxon_parameters["scientificName"] = $sciname; $taxon_parameters["source"] = $source_url; /* $taxon_parameters["commonNames"] = array(); $arr_comname=conv_2array($comname); foreach ($arr_comname as $commonname) { $commonname = str_ireplace(';' , '', $commonname); $taxon_parameters["commonNames"][] = new \SchemaCommonName(array("name" => $commonname, "language" => "en")); } */ ///////////////////////////////////////////////////////////// /* $taxon_params["synonyms"] = array(); $arr_synonym=conv_2array($synonymy); foreach ($arr_synonym as $synonym) { $taxon_parameters["synonyms"][] = new \SchemaSynonym(array("synonym" => $synonym, "relationship" => "synonym")); } */ ///////////////////////////////////////////////////////////// $taxon_parameters["dataObjects"] = array(); $used_taxa[$taxon_identifier] = $taxon_parameters; } //start text dataobject $dc_identifier = $do_identifier; //$dc_identifier = ""; $desc = $desc; $title = "Physical Description"; $subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Description"; $type = "text"; $reference = ""; $data_object_parameters = get_data_object($dc_identifier, $desc, $dc_rights, $title, $source_url, $subject, $type, $reference, $arr_agents); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); //end text dataobject //start text dataobject //end text dataobject //start img dataobject //end img dataobject $used_taxa[$taxon_identifier] = $taxon_parameters; return ""; }
$taxonParameters["genus"] = $genus; $taxonParameters["scientificName"] = $sciname; $taxonParameters["created"] = $created; $taxonParameters["modified"] = $modified; $taxonParameters["synonyms"] = array(); foreach ($t->synonym as $syn) { $taxonParameters["synonyms"][] = new \SchemaSynonym(array("synonym" => $syn, "relationship" => $url = $syn["relationship"])); } //start process dataObjects ===================================================================== $taxonParameters["dataObjects"] = array(); $dataObjects = array(); $arr = $t->dataObject; foreach ($arr as $do) { $t_dc2 = $do->children("http://purl.org/dc/elements/1.1/"); $t_dcterms = $do->children("http://purl.org/dc/terms/"); $dataObjects[] = get_data_object($do, $t_dc2, $t_dcterms); } foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } //end process dataObjects ===================================================================== /////////////////////////////////////////////////////////////////////////////////// $taxa = array(); $taxa[] = new \SchemaTaxon($taxonParameters); //$new_resource_xml = SchemaDocument::get_taxon_xml($taxa); $str = ''; foreach ($taxa as $tax) { $str .= $tax->__toXML(); } fwrite($OUT, $str);
$taxon_parameters["source"] = $source_url; $taxon_parameters["dataObjects"] = array(); $used_taxa[$taxon_identifier] = $taxon_parameters; if (isset($rec->original_description)) { $dcterms_created = ""; $dcterms_modified = ""; $copyright_text = ""; $dc_identifier = $taxon_identifier . "_GenDesc"; $dc_source = $source_url; $agent_name = $agent; $license = "http://creativecommons.org/licenses/by-nc/3.0/"; $desc = $rec->original_description; $taxon_id = $rec["ID"]; $taxon_ref = @$arr_taxon_ref["{$taxon_id}"]; print " taxon_id = {$taxon_id} "; $data_object_parameters = get_data_object($dc_identifier, $dcterms_created, $dcterms_modified, $copyright_text, $license, $agent_name, $desc, "text", $source_url, $taxon_ref, $arr_ref); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); } } /* start 2nd dataobject - image */ /* end second dataobject - image */ $used_taxa[$taxon_identifier] = $taxon_parameters; $k++; //if($k == 3)break; //debug - to limit no. of records } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } ////////////////////// --- $new_resource_xml = SchemaDocument::get_taxon_xml($schema_taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
//start get rights and publisher from page $arr = parse_image_page($source); $rightsHolder = trim($arr[0]); $publisher = trim($arr[1]); //end get rights and publisher from page $id = parse_url($mediaURL, PHP_URL_QUERY); $id = trim(substr($id, stripos($id, "=") + 1, strlen($id))); $mediaURL = $image_url . $id; $desc = Functions::import_decode($t->{$cap_str}); $mimeType = Functions::import_decode($t->{$mim_str}); $do_count++; //$do_identifier = $identifier . "_" . $do_count; $do_identifier = $mediaURL; $do_agents = array(); $do_agents[] = array("name" => $publisher, "role" => "publisher"); $dataObjects[] = get_data_object($desc, $do_identifier, $subject, $dataType, $mimeType, $title, $source, $do_agents, $rightsHolder, $mediaURL); } } //print"<hr>"; //print"<pre>";print_r($img);print"</pre>"; //end images ================================================================================================= foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } //end process dataObjects ===================================================================== /////////////////////////////////////////////////////////////////////////////////// $taxa = array(); $taxa[] = new \SchemaTaxon($taxonParameters); //$new_resource_xml = SchemaDocument::get_taxon_xml($taxa); $str = '';
function process_dataobjects($item, $type, $object_id) { global $taxon_identifier; global $taxon_parameters; global $used_taxa; global $taxon; global $species_url; global $main; $dc_source = $species_url . $taxon_identifier; $dcterms_created = ""; $ref = ""; if ($type == 1) { $dc_identifier = ""; $description = trim($item->de_description); $description = str_ireplace("Dimensions.", "Dimensions. ", $description); $description = str_ireplace("Habitat.", "Habitat. ", $description); $title = "Description"; $subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription"; $mediaURL = ""; $dataType = "http://purl.org/dc/dcmitype/Text"; $mimeType = "text/html"; $agentParameters = array(); $agentParameters["role"] = "author"; $agentParameters["fullName"] = $item->de_author . " " . $item->de_year; $agents[] = new \SchemaAgent($agentParameters); $license = "http://creativecommons.org/licenses/" . $item->de_license; $dcterms_modified = $item->de_date; } else { $dc_identifier = $item->url; $description = trim($item->im_description); $title = ""; $subject = ""; $mediaURL = $item->url; $dataType = "http://purl.org/dc/dcmitype/StillImage"; $mimeType = "image/jpeg"; if ($item->photo_by != "") { $agentParameters = array(); $agentParameters["role"] = "author"; $agentParameters["fullName"] = $item->photo_by; $agents[] = new \SchemaAgent($agentParameters); } $license = "http://creativecommons.org/licenses/" . $item->im_license; $dcterms_modified = $item->im_date; } if (isset($main->contributed_by)) { $agentParameters["role"] = "source"; $agentParameters["fullName"] = $main->contributed_by; $agents[] = new \SchemaAgent($agentParameters); } $agentParameters["role"] = "project"; $agentParameters["fullName"] = "Radiolaria.org"; $agentParameters["homepage"] = "http://www.radiolaria.org/index.htm"; $agents[] = new \SchemaAgent($agentParameters); $data_object_parameters = get_data_object($dc_identifier, $dcterms_created, $dcterms_modified, $license, $description, $subject, $title, $dc_source, $mediaURL, $dataType, $mimeType, $ref, $agents); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); $used_taxa[$taxon] = $taxon_parameters; }