Esempio n. 1
1
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     self::build_taxa_rank_array($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
     print_r($this->debug);
 }
 function export_gbif_to_eol($params)
 {
     $this->uris = self::get_uris($params);
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml", array("timeout" => 7200, "expire_seconds" => false));
     // "expire_seconds" -- false => won't expire; 0 => expires now
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $this->harvester = new ContentArchiveReader(NULL, $archive_path);
     if (!@$this->harvester->tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     if ($params["dataset"] == "NMNH") {
         self::process_row_type($params);
     } elseif ($params["dataset"] == "NHM") {
         self::process_row_type_from_NHM($temp_dir . "/" . $params['location']);
     }
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
     print_r($this->debug);
 }
Esempio n. 3
0
 function export_gbif_to_eol($params)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["occurrence"] = $tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     /*
         $harvester->process_row_type() -  this will convert rows into array.
     */
     // $r = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence');
     // $r = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia');
     // print_r($r); exit;
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence'));
     self::get_media_objects($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia'));
     // self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     // self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     // self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     // self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
 }
Esempio n. 4
0
 function process_xml($params)
 {
     $all_taxa = self::get_taxa_list_from_biogeodb();
     $this->taxa_id_list = array_merge($all_taxa, self::get_taxa_list_from_myspecies());
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => false));
     print_r($paths);
     $params["path"] = $paths["temp_dir"];
     $xml = self::update_xml($params);
     recursive_rmdir($paths["temp_dir"]);
     // remove temp dir
     return $xml;
 }
Esempio n. 5
0
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference')) {
         self::get_references($records);
     }
     if ($records = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon')) {
         $taxa_id_list = self::get_taxa_id_list($records);
         self::create_instances_from_taxon_object($records, $taxa_id_list);
     }
     if ($this->params["process occurrence"]) {
         echo "\nProcessed OCCURRENCE\n";
         if ($records = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Occurrence')) {
             $this->uris = self::get_uris();
             self::get_occurrences($records);
         }
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Distribution')) {
         self::get_distributions($records);
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Image')) {
         self::get_images($records);
     }
     //http://eol.org/content_partners/159/resources/345
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Description')) {
         self::get_descriptions($records);
     }
     //http://eol.org/content_partners/159/resources/332
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName')) {
         self::get_vernaculars($records);
     }
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($paths['temp_dir']);
     echo "\n temporary directory removed: " . $paths['temp_dir'];
     print_r($this->debug);
 }
Esempio n. 6
0
 function clean_media_extension($resource_id, $dwca_file)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     if ($paths = $func->extract_archive_file($dwca_file, "meta.xml")) {
         print_r($paths);
         if ($contents = Functions::get_remote_file($paths['archive_path'] . "media.txt", array('timeout' => 172800))) {
             $contents = str_ireplace('<a title=""', '<a title="', $contents);
             $contents = str_ireplace('"" href=""', '" href="', $contents);
             $contents = str_ireplace('"">', '">', $contents);
             //saving new media.txt
             if (!($WRITE = fopen($paths['archive_path'] . "media.txt", "w"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $paths['archive_path'] . "media.txt");
                 return;
             }
             fwrite($WRITE, $contents);
             fclose($WRITE);
             // remove the archive file e.g. plazi.zip
             $info = pathinfo($dwca_file);
             unlink($paths['archive_path'] . $info["basename"]);
             // creating the archive file
             $command_line = "tar -czf " . CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".tar.gz --directory=" . $paths['archive_path'] . " .";
             $output = shell_exec($command_line);
             // moving files to /resources/
             recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             if (!file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) {
                 mkdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             }
             $src = $paths['archive_path'];
             $dst = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "/";
             $files = glob($paths['archive_path'] . "*.*");
             foreach ($files as $file) {
                 $file_to_go = str_replace($src, $dst, $file);
                 copy($file, $file_to_go);
             }
         }
         // remove temp dir
         recursive_rmdir($paths['archive_path']);
         echo "\n temporary directory removed: " . $paths['archive_path'];
     }
 }
 function export_gbif_to_eol($params)
 {
     $this->uris = self::get_uris($params, $params["uri_file"]);
     $params["uri_type"] = "citation";
     if ($file = @$params["citation_file"]) {
         $this->citations = self::get_uris($params, $file);
     }
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml", array("timeout" => 7200, "expire_seconds" => 0));
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $this->harvester = new ContentArchiveReader(NULL, $archive_path);
     if (!@$this->harvester->tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields) {
         echo "\nInvalid archive file. Program will terminate.\n";
         return false;
     }
     if ($params["dataset"] == "GBIF") {
         $params["row_type"] = "http://rs.tdwg.org/dwc/terms/occurrence";
         $params["location"] = "occurrence.txt";
         self::process_row_type($params);
     } elseif ($params["dataset"] == "iDigBio") {
         $params["row_type"] = "http://rs.tdwg.org/dwc/terms/occurrence";
         $params["location"] = "occurrence.txt";
         self::process_row_type($params);
     }
     /* old ways
        self::process_row_type(array("row_type" => 'http://rs.gbif.org/terms/1.0/Multimedia', "location" => "multimedia.txt"));
        self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence'));
        self::get_media_objects($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia'));
        self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
        self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
        */
     $this->archive_builder->finalize(TRUE);
     recursive_rmdir($temp_dir);
     // remove temp dir
     print_r($this->debug);
 }
 function export_xml_to_archive($params, $xml_file_YN = false)
 {
     if (!$xml_file_YN) {
         require_library('connectors/INBioAPI');
         $func = new INBioAPI();
         $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => 0));
         // "expire_seconds" -- false => won't expire; 0 => expires now //debug
         print_r($paths);
         $params["path"] = $paths["temp_dir"];
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         recursive_rmdir($paths["temp_dir"]);
         // remove temp dir
     } else {
         $params['path'] = DOC_ROOT . "tmp/";
         $local_xml_file = Functions::save_remote_file_to_local($params['eol_xml_file'], array('file_extension' => "xml", 'cache' => 0, "timeout" => 7200, "download_attempts" => 2, "delay_in_minutes" => 2));
         //debug - cache should be 0 zero in normal operation
         $params['filename'] = pathinfo($local_xml_file, PATHINFO_BASENAME);
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         unlink($local_xml_file);
     }
 }
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     $row_types = self::get_XML_fields($paths["temp_dir"] . "meta.xml", "rowType");
     /* manual assignment, use this if some referenceID in Measurements don't exist in References.tab
        $row_types = array("http://eol.org/schema/reference/Reference", "http://rs.tdwg.org/dwc/terms/Taxon", "http://rs.tdwg.org/dwc/terms/MeasurementOrFact", "http://rs.tdwg.org/dwc/terms/Occurrence");
        */
     print_r($row_types);
     foreach ($row_types as $row_type) {
         $basename = pathinfo($row_type, PATHINFO_BASENAME);
         if ($basename == "Taxon") {
             $allowed_fields = array("taxonID", "scientificName", "parentNameUsageID", "kingdom", "phylum", "class", "order", "family", "genus", "taxonRank", "furtherInformationURL", "taxonomicStatus", "taxonRemarks", "namePublishedIn", "referenceID");
         } else {
             $allowed_fields = self::get_XML_fields($this->extensions[$basename], "property name");
         }
         //manual adjustment
         if ($row_type == "VernacularName") {
             $allowed_fields[] = "taxonID";
         }
         self::process_fields($harvester->process_row_type($row_type), $basename, $allowed_fields);
         // e.g. self::process_fields($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'), "Taxon");
     }
     $this->archive_builder->finalize(TRUE);
     recursive_rmdir($paths['temp_dir']);
     // remove temp dir
     echo "\n temporary directory removed: " . $paths['temp_dir'];
     print_r($this->debug);
 }
Esempio n. 10
0
namespace php_active_record;

/* connector for INBio
Partner provides DWC-A file
estimated execution time: 18 minutes
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
$timestart = time_elapsed();
require_library('connectors/INBioAPI');
$resource_id = 276;
// $dwca_file = "http://localhost/~eolit/dwca-inbio-eol.zip"; //zip extracts directly to temp_dir
// $dwca_file = "http://localhost/~eolit/dwca_inbio.zip"; //zip extracts it within a folder inside temp_dir
// $dwca_file = "http://localhost/~eolit/dwca_inbio_small.zip";
// $dwca_file = "http://localhost/~eolit/dwca.tar.gz";
$dwca_file = "http://dl.dropbox.com/u/7597512/INBIO/dwca_inbio.zip";
$func = new INBioAPI();
if ($taxa = $func->get_all_taxa($dwca_file)) {
    $xml = \SchemaDocument::get_taxon_xml($taxa);
    $xml = $func->assign_eol_subjects($xml);
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    if (!($OUT = fopen($resource_path, "w"))) {
        debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
        return;
    }
    fwrite($OUT, $xml);
    fclose($OUT);
    Functions::set_resource_status_to_force_harvest($resource_id);
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\nelapsed time = " . $elapsed_time_sec . " seconds";
echo "\nelapsed time = " . $elapsed_time_sec / 60 . " minutes";
Esempio n. 11
0
/* connector for National Museum of Natural History Image Collection
estimated execution time: 1.5 mins.
Connector reads the XML provided by partner and 
- sets the image rating.
- If needed ingests TypeInformation text dataObjects
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('ResourceDataObjectElementsSetting');
$timestart = time_elapsed();
$resource_id = 120;
$resource_path = "http://collections.mnh.si.edu/services/eol/nmnh-iz-response.xml.gz";
//NMNH Invertebrate Zoology resource
$result = $GLOBALS['db_connection']->select("SELECT accesspoint_url FROM resources WHERE id={$resource_id}");
$row = $result->fetch_row();
$new_resource_path = $row[0];
if ($resource_path != $new_resource_path && $new_resource_path != '') {
    $resource_path = $new_resource_path;
}
echo "\n processing resource:\n {$resource_path} \n\n";
$nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path, 'http://purl.org/dc/dcmitype/StillImage', 2);
$xml = $nmnh->set_data_object_rating_on_xml_document();
require_library('connectors/INBioAPI');
$xml = INBioAPI::assign_eol_subjects($xml);
$nmnh->save_resource_document($xml);
Functions::set_resource_status_to_force_harvest($resource_id);
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} seconds             \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes  \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n";
echo "\n\n Done processing.";