Пример #1
0
 function get_common_names($names)
 {
     // might need or not need this...
     $common = utf8_encode($name['commonName']);
     if (Functions::is_utf8($common)) {
         $arr_names[] = array("name" => Functions::import_decode($common), "language" => $name['xml_lang']);
     }
 }
 private function add_higher_level_taxa_to_archive()
 {
     $exclude = array("kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies");
     foreach ($this->name_id as $taxon_id => $rec) {
         if (in_array($rec["sciname"], $exclude)) {
             continue;
         }
         if (!Functions::is_utf8($rec["sciname"])) {
             continue;
         }
         $taxon = new \eol_schema\Taxon();
         $taxon->taxonID = (string) $taxon_id;
         $taxon->taxonRank = (string) @$rec["rank"];
         $taxon->scientificName = (string) Functions::import_decode($rec["sciname"]);
         $taxon->parentNameUsageID = $rec["parent"];
         if (isset($this->taxon_ids[$taxon_id])) {
             continue;
         }
         if (!$taxon->parentNameUsageID && @$rec["rank"] != "kingdom") {
             continue;
         }
         if (is_numeric(stripos($rec["sciname"], "unassigned"))) {
             continue;
         }
         if (!@$rec["rank"]) {
             $this->no_rank++;
             // echo "\n wrong data: " . $rec["sciname"] . " [$taxon_id]";
         } else {
             $this->taxa[$taxon->taxonID] = $taxon;
             $this->taxon_ids[$taxon->taxonID] = 1;
         }
     }
 }
 private function process_fields($records, $class, $allowed_fields)
 {
     foreach ($records as $rec) {
         if ($class == "VernacularName") {
             $c = new \eol_schema\VernacularName();
         } elseif ($class == "Agent") {
             $c = new \eol_schema\Agent();
         } elseif ($class == "Reference") {
             $c = new \eol_schema\Reference();
         } elseif ($class == "Taxon") {
             $c = new \eol_schema\Taxon();
         } elseif ($class == "MeasurementOrFact") {
             $c = new \eol_schema\MeasurementOrFact();
         } elseif ($class == "Occurrence") {
             $c = new \eol_schema\Occurrence();
         } elseif ($class == "Distribution") {
             $c = new \eol_schema\MediaResource();
         } elseif ($class == "Image") {
             $c = new \eol_schema\MediaResource();
         }
         $keys = array_keys($rec);
         $save = true;
         foreach ($keys as $key) {
             $temp = pathinfo($key);
             $field = $temp["basename"];
             /* resource specifications */
             // if($this->resource_id == "345") //3I Interactive resource
             // if(true)
             // {
             //     if($class == "Image" && $field == "license")            $field = "UsageTerms";
             //     if($class == "Distribution" && $field == "locality")    $field = "Description";
             // }
             /* end specifications */
             // manual adjustment bec. of a typo in meta.xml, without "s"
             if ($field == "measurementRemark") {
                 $field = "measurementRemarks";
             }
             /*
             // sample way to exclude if field is to be excluded
             if($field == "attribution") continue; //not recognized in eol: http://indiabiodiversity.org/terms/attribution
             */
             if (!in_array($field, $allowed_fields)) {
                 $this->debug["undefined"][$class][$field] = '';
                 continue;
             }
             // some fields have '#', e.g. "http://schemas.talis.com/2005/address/schema#localityName"
             $parts = explode("#", $field);
             if ($parts[0]) {
                 $field = $parts[0];
             }
             if (@$parts[1]) {
                 $field = $parts[1];
             }
             $value = trim((string) $rec[$key]);
             $value = trim(Functions::import_decode($value));
             if (!Functions::is_utf8($value)) {
                 $save = false;
             }
             //special arrangement
             if ($class == "Reference") {
                 if ($field == "identifier") {
                     $this->reference_ids[$value] = '';
                 }
                 if ($field == "full_reference") {
                     if (!$value) {
                         $full_ref = "";
                         if ($val = (string) @$rec["http://eol.org/schema/reference/primaryTitle"]) {
                             $full_ref .= $val;
                         } elseif ($val = (string) @$rec["http://purl.org/dc/terms/title"]) {
                             $full_ref .= $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/ontology/bibo/pageStart"]) {
                             $full_ref .= ". Page(s) " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/ontology/bibo/pageEnd"]) {
                             $full_ref .= " - " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/ontology/bibo/volume"]) {
                             $full_ref .= ". Vol. " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/dc/terms/publisher"]) {
                             $full_ref .= ". Publisher: " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/ontology/bibo/authorList"]) {
                             $full_ref .= ". Author: " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/ontology/bibo/editorList"]) {
                             $full_ref .= ". Editor: " . $val;
                         }
                         if ($val = (string) @$rec["http://purl.org/dc/terms/created"]) {
                             $full_ref .= ". " . $val;
                         }
                         if ($val = $full_ref) {
                             $value = $full_ref;
                         } else {
                             echo " -- still blank";
                         }
                     }
                 }
             }
             // if($class == "MeasurementOrFact" && $field == "referenceID")
             // {
             //     if($value)
             //     {
             //         if(!isset($this->reference_ids[$value])) echo " -- undefined refid:[$value]";
             //     }
             // }
             if ($value) {
                 $c->{$field} = $value;
             }
         }
         /* if($class == "objects") {} // sample way to filter */
         if ($save) {
             $this->archive_builder->write_object_to_file($c);
         }
     }
 }
 public static function eol_schema_validate($uri)
 {
     if (!$uri) {
         return false;
     }
     $valid = SchemaValidator::validate($uri);
     if ($valid !== true) {
         return array();
     }
     $errors = array();
     $warnings = array();
     $reader = new \XMLReader();
     $reader->open($uri);
     $i = 0;
     while (@$reader->read()) {
         if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") {
             $taxon_xml = $reader->readOuterXML();
             $t = simplexml_load_string($taxon_xml, null, LIBXML_NOCDATA);
             $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
             $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
             $identifier = Functions::import_decode($t_dc->identifier);
             $source_url = Functions::import_decode($t_dc->source);
             $scientific_name = Functions::import_decode($t_dwc->ScientificName);
             if (!$identifier) {
                 $warnings[$scientific_name]["taxon without dc:identifier"] = true;
             }
             if (!$source_url) {
                 $warnings[$scientific_name]["taxon without dc:source"] = true;
             }
             foreach ($t->dataObject as $d) {
                 $d_dc = $d->children("http://purl.org/dc/elements/1.1/");
                 $identifier = Functions::import_decode($d_dc->identifier);
                 /* Checking requirements*/
                 if (!$identifier) {
                     $warnings[$scientific_name]["data object without dc:identifier"] = true;
                 }
             }
             $xml->taxon[$i] = null;
             $i++;
         }
     }
     return array($errors, $warnings);
 }
Пример #5
0
 function get_data_object($rec)
 {
     $data_object_parameters = array();
     $data_object_parameters["identifier"] = trim(@$rec["identifier"]);
     $data_object_parameters["source"] = $rec["source"];
     $data_object_parameters["dataType"] = trim($rec["dataType"]);
     $data_object_parameters["mimeType"] = trim($rec["mimeType"]);
     $data_object_parameters["mediaURL"] = trim(@$rec["mediaURL"]);
     $data_object_parameters["thumbnailURL"] = trim(@$rec["thumbnailURL"]);
     $data_object_parameters["created"] = trim(@$rec["created"]);
     $data_object_parameters["description"] = Functions::import_decode(@$rec["description"]);
     $data_object_parameters["source"] = @$rec["source"];
     $data_object_parameters["license"] = @$rec["license"];
     $data_object_parameters["rightsHolder"] = @trim($rec["rightsHolder"]);
     $data_object_parameters["title"] = @trim($rec["title"]);
     $data_object_parameters["language"] = "en";
     //==========================================================================================
     $agents = array();
     foreach (@$rec["agent"] as $agent) {
         $agentParameters = array();
         $agentParameters["role"] = $agent["role"];
         $agentParameters["homepage"] = $agent["homepage"];
         $agentParameters["logoURL"] = "";
         $agentParameters["fullName"] = $agent["fullName"];
         $agents[] = new \SchemaAgent($agentParameters);
     }
     $data_object_parameters["agents"] = $agents;
     //==========================================================================================
     return $data_object_parameters;
 }
Пример #6
0
function get_data_object($desc, $do_identifier, $subject, $dataType, $mimeType, $title, $source, $do_agents, $rightsHolder, $mediaURL = NULL)
{
    $dataObjectParameters = array();
    $dataObjectParameters["identifier"] = $do_identifier;
    $dataObjectParameters["dataType"] = $dataType;
    $dataObjectParameters["mimeType"] = $mimeType;
    $dataObjectParameters["title"] = $title;
    $dataObjectParameters["language"] = "en";
    $dataObjectParameters["description"] = $desc;
    if ($subject != "") {
        $dataObjectParameters["subjects"] = array();
        $subjectParameters = array();
        $subjectParameters["label"] = $subject;
        $dataObjectParameters["subjects"][] = new \SchemaSubject($subjectParameters);
    }
    //if($mimeType == "text/html")
    //{
    $agents = array();
    foreach ($do_agents as $agent) {
        $agentParameters = array();
        $agentParameters["role"] = $agent["role"];
        $agentParameters["homepage"] = "http://emuweb.fieldmuseum.org/botany/botanytaxon.php";
        //$agentParameters["logoURL"]  = $agent["logoURL"];
        //$agentParameters["fullName"] = Functions::import_decode($agent["name"]);
        $agentParameters["fullName"] = utf8_encode($agent["name"]);
        $agents[] = new \SchemaAgent($agentParameters);
    }
    $dataObjectParameters["agents"] = $agents;
    //}
    $dataObjectParameters["license"] = "http://creativecommons.org/licenses/by-nc-sa/3.0/";
    $dataObjectParameters["source"] = $source;
    $dataObjectParameters["rightsHolder"] = Functions::import_decode($rightsHolder);
    /*
    $dataObjectParameters["created"]       = $do->created;
    $dataObjectParameters["modified"]      = $do->modified;
    
    
    $dataObjectParameters["thumbnailURL"]  = $do->thumbnailURL;
    $dataObjectParameters["location"]      = Functions::import_decode($do->location);
    */
    if ($mimeType != "text/html") {
        $dataObjectParameters["mediaURL"] = $mediaURL;
    }
    ///////////////////////////////////
    $dataObjectParameters["audiences"] = array();
    $audienceParameters = array();
    $audienceParameters["label"] = "Expert users";
    $dataObjectParameters["audiences"][] = new \SchemaAudience($audienceParameters);
    $audienceParameters["label"] = "General public";
    $dataObjectParameters["audiences"][] = new \SchemaAudience($audienceParameters);
    ///////////////////////////////////
    return $dataObjectParameters;
}
Пример #7
0
 private function format_utf8($string)
 {
     $string = Functions::import_decode($string);
     if (!Functions::is_utf8($string)) {
         return utf8_encode($string);
     }
     return $string;
 }
Пример #8
0
 function get_data_object($rec)
 {
     $data_object_parameters = array();
     $data_object_parameters["identifier"] = trim(@$rec["identifier"]);
     $data_object_parameters["source"] = $rec["source"];
     $data_object_parameters["dataType"] = trim($rec["dataType"]);
     $data_object_parameters["mimeType"] = trim($rec["mimeType"]);
     $data_object_parameters["mediaURL"] = trim(@$rec["mediaURL"]);
     $data_object_parameters["created"] = trim(@$rec["created"]);
     $data_object_parameters["source"] = $rec["source"];
     $data_object_parameters["description"] = Functions::import_decode($rec["description"]);
     $data_object_parameters["location"] = Functions::import_decode($rec["location"]);
     $data_object_parameters["license"] = $rec["license"];
     $data_object_parameters["rightsHolder"] = trim($rec["rightsHolder"]);
     $data_object_parameters["title"] = @trim($rec["title"]);
     $data_object_parameters["language"] = "en";
     //==========================================================================================
     if (trim($rec["subject"])) {
         $data_object_parameters["subjects"] = array();
         $subjectParameters = array();
         $subjectParameters["label"] = trim($rec["subject"]);
         $data_object_parameters["subjects"][] = new \SchemaSubject($subjectParameters);
     }
     //==========================================================================================
     $agents = array();
     foreach (@$rec["agent"] as $agent) {
         $agentParameters = array();
         $agentParameters["role"] = $agent["role"];
         $agentParameters["homepage"] = $agent["homepage"];
         $agentParameters["logoURL"] = "";
         $agentParameters["fullName"] = $agent[0];
         $agents[] = new \SchemaAgent($agentParameters);
     }
     $data_object_parameters["agents"] = $agents;
     //==========================================================================================
     $data_object_parameters["references"] = array();
     $ref = array();
     foreach ($rec["object_refs"] as $r) {
         if (!$r["ref"]) {
             continue;
         }
         $referenceParameters = array();
         $referenceParameters["fullReference"] = Functions::import_decode($r["ref"]);
         if ($r["url"]) {
             $referenceParameters["referenceIdentifiers"][] = new \SchemaReferenceIdentifier(array("label" => "url", "value" => trim($r["url"])));
         }
         $ref[] = new \SchemaReference($referenceParameters);
     }
     $data_object_parameters["references"] = $ref;
     //==========================================================================================
     return $data_object_parameters;
 }
Пример #9
0
 private static function clean_string($str, $remove_whitespace = false, $decode = true)
 {
     $str = Functions::import_decode(trim($str), $remove_whitespace, $decode);
     $str = str_replace(" ", " ", $str);
     $str = str_replace("\t", " ", $str);
     return trim($str);
 }
Пример #10
0
function get_data_object($do, $t_dc2, $t_dcterms)
{
    $dataObjectParameters = array();
    $dataObjectParameters["identifier"] = $t_dc2->identifier;
    $dataObjectParameters["dataType"] = $do->dataType;
    $dataObjectParameters["mimeType"] = $do->mimeType;
    $description = str_ireplace('<strong>', '', $t_dc2->description);
    $description = str_ireplace('</strong>', '', $description);
    $dataObjectParameters["description"] = "<strong>{$t_dc2->title}</strong>  <br>  {$description}";
    $dataObjectParameters["title"] = "Functional adaptation";
    $dataObjectParameters["language"] = $t_dc2->language;
    //this overwrites whatever is given by AskNature, currently it is GenDesc
    $dataObjectParameters["subjects"] = array();
    $subjectParameters = array();
    $subjectParameters["label"] = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Evolution";
    $dataObjectParameters["subjects"][] = new \SchemaSubject($subjectParameters);
    $dataObjectParameters["additionalInformation"] = "<subject>http://www.eol.org/voc/table_of_contents#FunctionalAdaptations</subject>";
    ///////////////////////////////////////////////////////////////////////
    $agents = array();
    foreach ($do->agent as $agent) {
        $agentParameters = array();
        $agentParameters["role"] = $agent["role"];
        $agentParameters["homepage"] = $agent["homepage"];
        $agentParameters["logoURL"] = $agent["logoURL"];
        $agentParameters["fullName"] = Functions::import_decode($agent);
        $agents[] = new \SchemaAgent($agentParameters);
    }
    $dataObjectParameters["agents"] = $agents;
    ///////////////////////////////////////////////////////////////////////
    $dataObjectParameters["created"] = $do->created;
    $dataObjectParameters["modified"] = $do->modified;
    $dataObjectParameters["license"] = $do->license;
    $dataObjectParameters["rights"] = $do->rights;
    $dataObjectParameters["rightsHolder"] = Functions::import_decode($t_dcterms->rightsHolder);
    $dataObjectParameters["source"] = $t_dc2->source;
    $dataObjectParameters["mediaURL"] = $do->mediaURL;
    $dataObjectParameters["thumbnailURL"] = $do->thumbnailURL;
    $dataObjectParameters["location"] = Functions::import_decode($do->location);
    ///////////////////////////////////////////////////////////////////////
    $dataObjectParameters["audiences"] = array();
    $audienceParameters = array();
    $audienceParameters["label"] = $do->audience;
    $dataObjectParameters["audiences"][] = new \SchemaAudience($audienceParameters);
    ///////////////////////////////////////////////////////////////////////
    ///////////////////////////////////////////////////////////////////////
    $dataObjectParameters["references"] = array();
    $refs = array();
    foreach ($do->reference as $ref) {
        $referenceParameters = array();
        $referenceParameters["fullReference"] = $ref;
        if (@$agent["URL"] || @$agent["ISBN"]) {
            $referenceParameters["referenceIdentifiers"][] = new \SchemaReferenceIdentifier(array("label" => "url", "value" => self::format(@$agent["URL"])));
            $referenceParameters["referenceIdentifiers"][] = new \SchemaReferenceIdentifier(array("label" => "isbn", "value" => self::format(@$agent["ISBN"])));
        }
        $refs[] = new \SchemaReference($referenceParameters);
    }
    $dataObjectParameters["references"] = $refs;
    ///////////////////////////////////////////////////////////////////////
    return $dataObjectParameters;
}
Пример #11
0
 public static function eol_schema_validate($uri)
 {
     if (!$uri) {
         return false;
     }
     $valid = SchemaValidator::validate($uri);
     if ($valid !== true) {
         return array();
     }
     $errors = array();
     $warnings = array();
     $reader = new \XMLReader();
     $reader->open($uri);
     $i = 0;
     while (@$reader->read()) {
         if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") {
             $taxon_xml = $reader->readOuterXML();
             $t = simplexml_load_string($taxon_xml, null, LIBXML_NOCDATA);
             $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
             $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
             $identifier = Functions::import_decode($t_dc->identifier);
             $source_url = Functions::import_decode($t_dc->source);
             $scientific_name = Functions::import_decode($t_dwc->ScientificName);
             if (!$identifier) {
                 $warnings[$scientific_name]["taxon without dc:identifier"] = true;
             }
             if (!$source_url) {
                 $warnings[$scientific_name]["taxon without dc:source"] = true;
             }
             foreach ($t->dataObject as $d) {
                 $d_dc = $d->children("http://purl.org/dc/elements/1.1/");
                 $identifier = Functions::import_decode($d_dc->identifier);
                 $data_type = Functions::import_decode($d->dataType);
                 $license = Functions::import_decode($d->license);
                 $source_url = Functions::import_decode($d_dc->source);
                 $description = Functions::import_decode($d_dc->description, 0, 0);
                 $object_url = Functions::import_decode($d->mediaURL);
                 $subjects = array();
                 foreach ($d->subject as $s) {
                     $subjects[] = trim((string) $s);
                 }
                 /* Checking requirements*/
                 if (!$identifier) {
                     $warnings[$scientific_name]["data object without dc:identifier"] = true;
                 }
                 if (!$license) {
                     $warnings[$scientific_name]["data object without license"] = true;
                 }
                 //if text: must have description
                 if ($data_type == "http://purl.org/dc/dcmitype/Text" && !$description) {
                     $errors[$scientific_name]["text without dc:description"] = true;
                 }
                 //if text: must have subject
                 if ($data_type == "http://purl.org/dc/dcmitype/Text" && !$subjects) {
                     $errors[$scientific_name]["text without subject"] = true;
                 }
                 //if image, movie or sound: must have object_url
                 if ($data_type != "http://purl.org/dc/dcmitype/Text" && !$object_url) {
                     $errors[$scientific_name]["media without mediaURL"] = true;
                 }
             }
             //unset($xml->taxon[$i]);
             $xml->taxon[$i] = null;
             $i++;
             //if($i%100==0 && DEBUG) debug("Parsed taxon $i");
             //if(defined("DEBUG_PARSE_TAXON_LIMIT") && $i >= DEBUG_PARSE_TAXON_LIMIT) break;
         }
     }
     return array($errors, $warnings);
 }
Пример #12
0
 function get_common_names($names)
 {
     $arr_names = array();
     if ($names) {
         foreach ($names as $name) {
             $common = utf8_encode($name['commonName']);
             if (Functions::is_utf8($common)) {
                 $arr_names[] = array("name" => Functions::import_decode($common), "language" => $name['xml_lang']);
             } else {
                 echo "\n not utf8 common name: [" . $common . "]\n";
             }
         }
     }
     return $arr_names;
 }
Пример #13
0
function get_references($file)
{
    $str = Functions::get_remote_file($file);
    $str = str_replace(array("\n", "\r", "\t", "\\o", "\\xOB"), '', $str);
    $str = str_replace(array("<nobr>", "</nobr>", "taxon_view.cfm?mode=bibliography&citation=", "&#776;"), '', $str);
    $str = str_ireplace('&#8216;', "'", $str);
    //special char [�]
    $str = str_ireplace('&#8217;', "'", $str);
    //special char [�]
    $str = str_ireplace('&#8220;', '"', $str);
    //special char [�]
    $str = str_ireplace('&#8221;', '"', $str);
    //special char [�]
    $str = str_ireplace('&#8211;', '-', $str);
    //special char [�]
    $str = str_ireplace('&#769;', "'", $str);
    //special char [?]
    $str = str_ireplace('&amp;', "and", $str);
    $str = Functions::import_decode($str);
    $str = str_ireplace('<p class="biblio">', 'xxx', $str);
    $str = str_ireplace('xxx', "&arr[]=", $str);
    $arr = array();
    parse_str($str);
    $arr_ref = array();
    $arr_taxon_ref = array();
    $i = 1;
    foreach ($arr as $r) {
        print "\n {$i} of " . count($arr) . "\n";
        if (is_numeric(stripos($r, '<a href="'))) {
            if (preg_match("/<a href=\"(.*?)\">/", $r, $matches)) {
                $ref_num = $matches[1];
            }
            $file = $GLOBALS['cited_taxa_prefix'] . $ref_num;
            $str = Functions::get_remote_file($file);
            //<input type="checkbox"
            $str = str_ireplace('<input type="checkbox"', 'xxx<input type="checkbox"', $str);
            $str = str_ireplace('xxx', "&arr2[]=", $str);
            $arr2 = array();
            parse_str($str);
            foreach ($arr2 as $r2) {
                //onclick="reMap('8418','Chlamys gemmulata')"
                if (preg_match("/onclick=\"reMap(.*?)\"/", $r2, $matches)) {
                    $sub_str = $matches[1];
                }
                //('6577','Galiteuthis glacialis')
                if (preg_match("/\\(\\'(.*?)\\'\\,/", $sub_str, $matches)) {
                    $taxon_id = $matches[1];
                }
                if (preg_match("/\\,\\'(.*?)\\'\\)/", $sub_str, $matches)) {
                    $taxon = $matches[1];
                }
                print "[{$taxon_id}][{$taxon}]\n";
                //$arr_taxon_ref[$taxon_id][]=$ref_num;
                $arr_taxon_ref["{$taxon_id}"][] = $ref_num;
            }
            $arr_ref[$ref_num] = $str = trim(str_ireplace('View cited taxa', "", strip_tags($r, "<em>")));
            $i++;
            //if($i == 3)break;//debug - to limit no. of records
        }
    }
    return array(0 => $arr_ref, 1 => $arr_taxon_ref);
}
 private function get_contributor_name($url)
 {
     $options = array('resource_id' => 'gbif', 'expire_seconds' => false, 'download_wait_time' => 1000000, 'timeout' => 900, 'download_attempts' => 2, 'delay_in_minutes' => 2);
     // 15mins timeout
     $options = array('resource_id' => 'gbif', 'expire_seconds' => false, 'download_wait_time' => 1000000, 'timeout' => 900);
     // 15mins timeout
     if ($html = Functions::lookup_with_cache($url, $options)) {
         // <title property="dc:title">Herbarium Berolinense - Dataset detail</title>
         if (preg_match("/\"dc:title\">(.*?)\\- Dataset detail/ims", $html, $arr)) {
             if (!Functions::is_utf8($arr[1])) {
                 exit("\n culprit is contributor name \n");
             }
             return Functions::import_decode(trim($arr[1]));
         }
     }
 }
Пример #15
0
 private function process_text($rec, $source_url, $description, $subject, $reference_ids = array())
 {
     $identifier = self::get_identifier($source_url);
     $description = Functions::import_decode($description);
     if (!Functions::is_utf8($description)) {
         return;
     }
     $mr = new \eol_schema\MediaResource();
     if ($reference_ids) {
         $mr->referenceID = implode("; ", $reference_ids);
     }
     if ($this->agent_ids) {
         $mr->agentID = implode("; ", $this->agent_ids);
     }
     $mr->taxonID = (string) $rec['taxon_id'];
     $mr->identifier = $identifier;
     $mr->type = "http://purl.org/dc/dcmitype/Text";
     $mr->language = 'en';
     $mr->format = "text/html";
     $mr->furtherInformationURL = (string) $source_url;
     $mr->CVterm = $subject;
     $mr->Owner = $this->rights_holder;
     $mr->title = "";
     $mr->UsageTerms = "http://creativecommons.org/licenses/by-nc-sa/3.0/";
     $mr->audience = 'Everyone';
     $mr->description = (string) $description;
     if (!in_array($mr->identifier, $this->object_ids)) {
         $this->object_ids[] = $mr->identifier;
         $this->archive_builder->write_object_to_file($mr);
     }
 }
 private function process_mushroom_observer_list($wrong_urls)
 {
     if ($file = Functions::lookup_with_cache($this->mushroom_observer_eol, $this->download_options)) {
         $xml = simplexml_load_string($file);
         $i = 0;
         $total = count($xml->taxon);
         foreach ($xml->taxon as $t) {
             $i++;
             // if($i > 40) break; //debug
             $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
             $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
             $sciname = Functions::import_decode($t_dwc->ScientificName);
             $sciname = Functions::canonical_form($sciname);
             echo "\n{$i} of {$total}: {$sciname}";
             $url = "http://en.wikipedia.org/wiki/" . str_replace(" ", "_", $sciname);
             self::get_triple($url, $wrong_urls);
         }
     }
 }
Пример #17
0
 public function get_synth_tags($html)
 {
     $synth_tags = array();
     if (preg_match("/<div id=\"tagCloud\">(.*?)<\\/div>/ims", $html, $arr)) {
         if (preg_match_all("/aspx\\?q=(.*?)\">/", $arr[1], $tags, PREG_SET_ORDER)) {
             foreach ($tags as $tag) {
                 $synth_tags[] = Functions::import_decode($tag[1]);
             }
         }
     }
     return $synth_tags;
 }
Пример #18
0
function get_data_object($do, $t_dc2, $t_dcterms)
{
    /*
    print $do->agent . " ";  
    print $do->agent["role"] . " \n";  
    print $do->agent["homepage"] . " ";  
    */
    $dataObjectParameters = array();
    $dataObjectParameters["identifier"] = $t_dc2->identifier;
    $dataObjectParameters["dataType"] = $do->dataType;
    $dataObjectParameters["mimeType"] = $do->mimeType;
    $dataObjectParameters["description"] = $t_dc2->description;
    if ($do->subject != "") {
        $dataObjectParameters["subjects"] = array();
        $subjectParameters = array();
        $subjectParameters["label"] = $do->subject;
        $dataObjectParameters["subjects"][] = new \SchemaSubject($subjectParameters);
    }
    $agents = array();
    foreach ($do->agent as $agent) {
        $agentParameters = array();
        $agentParameters["role"] = $agent["role"];
        $agentParameters["homepage"] = $agent["homepage"];
        $agentParameters["logoURL"] = $agent["logoURL"];
        $agentParameters["fullName"] = Functions::import_decode($agent);
        $agents[] = new \SchemaAgent($agentParameters);
    }
    $dataObjectParameters["agents"] = $agents;
    $dataObjectParameters["created"] = $do->created;
    $dataObjectParameters["modified"] = $do->modified;
    $dataObjectParameters["license"] = $do->license;
    $dataObjectParameters["rightsHolder"] = Functions::import_decode($t_dcterms->rightsHolder);
    $dataObjectParameters["source"] = $t_dc2->source;
    $dataObjectParameters["mediaURL"] = $do->mediaURL;
    $dataObjectParameters["thumbnailURL"] = $do->thumbnailURL;
    $dataObjectParameters["location"] = Functions::import_decode($do->location);
    ///////////////////////////////////
    $dataObjectParameters["audiences"] = array();
    $audienceParameters = array();
    $audienceParameters["label"] = "Expert users";
    $dataObjectParameters["audiences"][] = new \SchemaAudience($audienceParameters);
    $audienceParameters["label"] = "General public";
    $dataObjectParameters["audiences"][] = new \SchemaAudience($audienceParameters);
    ///////////////////////////////////
    return $dataObjectParameters;
}