示例#1
1
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     self::build_taxa_rank_array($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
     print_r($this->debug);
 }
示例#2
0
 function export_gbif_to_eol($params)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["occurrence"] = $tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     /*
         $harvester->process_row_type() -  this will convert rows into array.
     */
     // $r = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence');
     // $r = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia');
     // print_r($r); exit;
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence'));
     self::get_media_objects($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia'));
     // self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     // self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     // self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     // self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
 }
示例#3
0
 function check_unique_ids($resource_id, $file_extension = ".tab")
 {
     $harvester = new ContentArchiveReader(NULL, CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "/");
     $tables = $harvester->tables;
     $tables = array_keys($tables);
     // $tables = array_diff($tables, array("http://rs.tdwg.org/dwc/terms/measurementorfact")); //exclude measurementorfact
     $tables = array_diff($tables, array("http://rs.gbif.org/terms/1.0/vernacularname"));
     //exclude vernacular name
     print_r($tables);
     foreach ($tables as $table) {
         self::process_fields($harvester->process_row_type($table), pathinfo($table, PATHINFO_BASENAME));
     }
 }
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     $row_types = self::get_XML_fields($paths["temp_dir"] . "meta.xml", "rowType");
     /* manual assignment, use this if some referenceID in Measurements don't exist in References.tab
        $row_types = array("http://eol.org/schema/reference/Reference", "http://rs.tdwg.org/dwc/terms/Taxon", "http://rs.tdwg.org/dwc/terms/MeasurementOrFact", "http://rs.tdwg.org/dwc/terms/Occurrence");
        */
     print_r($row_types);
     foreach ($row_types as $row_type) {
         $basename = pathinfo($row_type, PATHINFO_BASENAME);
         if ($basename == "Taxon") {
             $allowed_fields = array("taxonID", "scientificName", "parentNameUsageID", "kingdom", "phylum", "class", "order", "family", "genus", "taxonRank", "furtherInformationURL", "taxonomicStatus", "taxonRemarks", "namePublishedIn", "referenceID");
         } else {
             $allowed_fields = self::get_XML_fields($this->extensions[$basename], "property name");
         }
         //manual adjustment
         if ($row_type == "VernacularName") {
             $allowed_fields[] = "taxonID";
         }
         self::process_fields($harvester->process_row_type($row_type), $basename, $allowed_fields);
         // e.g. self::process_fields($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'), "Taxon");
     }
     $this->archive_builder->finalize(TRUE);
     recursive_rmdir($paths['temp_dir']);
     // remove temp dir
     echo "\n temporary directory removed: " . $paths['temp_dir'];
     print_r($this->debug);
 }
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference')) {
         self::get_references($records);
     }
     if ($records = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon')) {
         $taxa_id_list = self::get_taxa_id_list($records);
         self::create_instances_from_taxon_object($records, $taxa_id_list);
     }
     if ($this->params["process occurrence"]) {
         echo "\nProcessed OCCURRENCE\n";
         if ($records = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Occurrence')) {
             $this->uris = self::get_uris();
             self::get_occurrences($records);
         }
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Distribution')) {
         self::get_distributions($records);
     }
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Image')) {
         self::get_images($records);
     }
     //http://eol.org/content_partners/159/resources/345
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Description')) {
         self::get_descriptions($records);
     }
     //http://eol.org/content_partners/159/resources/332
     if ($records = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName')) {
         self::get_vernaculars($records);
     }
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($paths['temp_dir']);
     echo "\n temporary directory removed: " . $paths['temp_dir'];
     print_r($this->debug);
 }
 private function combine_all_temp_archives($batches, $resource_id)
 {
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $resource_id . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     for ($i = 1; $i <= $batches; $i++) {
         $dir = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_" . $i . "_working/";
         $harvester = new ContentArchiveReader(NULL, $dir);
         $tables = $harvester->tables;
         if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
             return false;
         }
         // take note the index key is all lower case
         foreach (array_keys($tables) as $table) {
             self::process_fields($harvester->process_row_type($table), pathinfo($table, PATHINFO_BASENAME));
         }
         /* debug - uncomment in normal operation
            //delete temp dir and file.tar.gz
            if(is_dir($dir)) recursive_rmdir($dir);
            $file = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_" . $i . "_working.tar.gz";
            if(is_file($file)) unlink($file);
            */
     }
     $this->archive_builder->finalize(TRUE);
 }
 public function read_data()
 {
     $this->column_labels = array();
     $this->column_indices = array();
     foreach (new FileIterator($this->text_path["obis"]["ranges_OBIS"]) as $line_number => $line) {
         if ($line_number % 1000 == 0) {
             echo "{$line_number} :: " . time_elapsed() . " :: " . memory_get_usage() . "\n";
         }
         $line_data = ContentArchiveReader::line_to_array($line, ",", "\"");
         if ($line_number == 0) {
             $this->column_labels = $line_data;
             foreach ($this->column_labels as $k => $v) {
                 $this->column_indices[$v] = $k;
             }
             continue;
         }
         $this->process_line_data($line_data);
     }
 }
示例#8
0
 public static function get_all_taxa()
 {
     self::$MAPPINGS = self::assign_mappings();
     $all_taxa = array();
     $final_taxa = array();
     $used_collection_ids = array();
     $harvester = new ContentArchiveReader(NULL, DOC_ROOT . "temp/dwca_iabin");
     $tables = $harvester->tables;
     if (!($GLOBALS['fields'] = $tables["http://www.pliniancore.org/plic/pcfcore/pliniancore2.3"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     $images = self::get_images($harvester->process_row_type('http://rs.gbif.org/terms/1.0/image'));
     $references = self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/reference'));
     $vernacular_names = self::get_vernacular_names($harvester->process_row_type('http://rs.gbif.org/terms/1.0/vernacularname'));
     $taxon_media = array();
     $media = $harvester->process_row_type('http://www.pliniancore.org/plic/pcfcore/PlinianCore2.3');
     foreach ($media as $m) {
         $taxon_id = $m['http://rs.tdwg.org/dwc/terms/taxonID'];
         @($taxon_media[$taxon_id][] = $m);
     }
     $taxa = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon');
     $i = 0;
     $total = sizeof($taxa);
     foreach ($taxa as $taxon) {
         $i++;
         debug(" {$i} of {$total}");
         $taxon_id = @$taxon['http://rs.tdwg.org/dwc/terms/taxonID'];
         $taxon["id"] = $taxon_id;
         $taxon["image"] = @$images[$taxon_id];
         $taxon["reference"] = @$references[$taxon_id];
         $taxon["vernacular_name"] = @$vernacular_names[$taxon_id];
         $taxon["media"] = $taxon_media[$taxon_id];
         $arr = self::get_iabin_taxa($taxon, $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         //do in batches to speed it up.
         if ($page_taxa) {
             $all_taxa = array_merge($all_taxa, $page_taxa);
         }
         if (count($all_taxa) == 1000) {
             $final_taxa = array_merge($final_taxa, $all_taxa);
             $all_taxa = array();
         }
     }
     //last writes
     $final_taxa = array_merge($final_taxa, $all_taxa);
     return $final_taxa;
 }
 public function build_archive()
 {
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . "/{$this->resource_id}/";
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->column_labels = array();
     $this->column_indices = array();
     $this->taxon_occurrences = array();
     foreach (new FileIterator(self::TAXA_URL) as $line_number => $line) {
         if ($line_number % 1000 == 0) {
             echo "{$line_number} :: " . time_elapsed() . " :: " . memory_get_usage() . "\n";
         }
         $line_data = ContentArchiveReader::line_to_array($line, ",", "\"");
         if ($line_number == 0) {
             $this->column_labels = $line_data;
             foreach ($this->column_labels as $k => $v) {
                 $this->column_indices[$v] = $k;
             }
             continue;
         }
         $taxon = $this->add_taxon($line_data);
         $this->taxon_occurrences[$taxon->taxonID] = $this->add_occurrence($line_data, $taxon);
     }
     $this->column_labels = array();
     $this->column_indices = array();
     foreach (new FileIterator(self::DUMP_URL) as $line_number => $line) {
         if ($line_number % 1000 == 0) {
             echo "{$line_number} :: " . time_elapsed() . " :: " . memory_get_usage() . "\n";
         }
         $line_data = ContentArchiveReader::line_to_array($line, ",", "\"");
         if ($line_number == 0) {
             $this->column_labels = $line_data;
             foreach ($this->column_labels as $k => $v) {
                 $this->column_indices[$v] = $k;
             }
             continue;
         }
         $this->process_line_data($line_data);
     }
     $this->archive_builder->finalize(true);
 }
示例#10
0
 function get_all_taxa($dwca_file)
 {
     self::$MAPPINGS = self::assign_mappings();
     $all_taxa = array();
     $used_collection_ids = array();
     $paths = self::extract_archive_file($dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($GLOBALS['fields'] = $tables["http://www.pliniancore.org/plic/pcfcore/pliniancore2.3"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     $images = self::get_images($harvester->process_row_type('http://rs.gbif.org/terms/1.0/image'));
     $references = self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/reference'));
     $vernacular_names = self::get_vernacular_names($harvester->process_row_type('http://rs.gbif.org/terms/1.0/vernacularname'));
     $taxon_media = array();
     $media = $harvester->process_row_type('http://www.pliniancore.org/plic/pcfcore/PlinianCore2.3');
     foreach ($media as $m) {
         @($taxon_media[$m['http://rs.tdwg.org/dwc/terms/taxonID']] = $m);
     }
     $taxa = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon');
     $i = 0;
     $total = sizeof($taxa);
     foreach ($taxa as $taxon) {
         $i++;
         debug("{$i} of {$total}");
         $taxon_id = @$taxon['http://rs.tdwg.org/dwc/terms/taxonID'];
         $taxon["id"] = $taxon_id;
         $taxon["image"] = @$images[$taxon_id];
         $taxon["reference"] = @$references[$taxon_id];
         $taxon["vernacular_name"] = @$vernacular_names[$taxon_id];
         $taxon["media"] = $taxon_media[$taxon_id];
         $arr = self::get_inbio_taxa($taxon, $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         if ($page_taxa) {
             $all_taxa = array_merge($all_taxa, $page_taxa);
         }
     }
     // remove tmp dir
     if ($temp_dir) {
         shell_exec("rm -fr {$temp_dir}");
     }
     return $all_taxa;
 }