function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     self::build_taxa_rank_array($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'));
     self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
     print_r($this->debug);
 }
Beispiel #2
0
 function export_gbif_to_eol($params)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml");
     $archive_path = $paths['archive_path'];
     $temp_dir = $paths['temp_dir'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["occurrence"] = $tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     /*
         $harvester->process_row_type() -  this will convert rows into array.
     */
     // $r = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence');
     // $r = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia');
     // print_r($r); exit;
     self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence'));
     self::get_media_objects($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia'));
     // self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document'));
     // self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference'));
     // self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent'));
     // self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName'));
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     recursive_rmdir($temp_dir);
     echo "\n temporary directory removed: " . $temp_dir;
 }
 function generate_EnvEOL_data()
 {
     require_library('connectors/IUCNRedlistDataConnector');
     $func = new IUCNRedlistDataConnector();
     $basenames = array("eol_env_annotations_noParentTerms");
     // list of needed basenames
     $options = $this->download_options;
     $options['expire_seconds'] = 2592000 * 3;
     // 3 months before cache expires
     $text_path = $func->load_zip_contents($this->species_list_export, $options, $basenames, ".tsv");
     print_r($text_path);
     self::csv_to_array($text_path[$basenames[0]]);
     $this->archive_builder->finalize(TRUE);
     // remove temp dir
     $parts = pathinfo($text_path[$basenames[0]]);
     recursive_rmdir($parts["dirname"]);
     debug("\n temporary directory removed: " . $parts["dirname"]);
     recursive_rmdir($this->TEMP_DIR);
     // comment this if u want to check "need_to_check_tc_id.txt"
     /* run problematic tc_ids with cache=0 --- a utility
        $tc_ids = self::get_dump();
        foreach($tc_ids as $tc_id)
        {
            $rec['taxon_id'] = $tc_id;
            self::prepare_taxon($rec);
        }
        exit("\n-exit-\n");
        */
 }
 private function get_uris($params)
 {
     $fields = array();
     if (in_array($params["dataset"], array("NMNH", "NHM"))) {
         $fields["institutionCode"] = "institutionCode_uri";
         $fields["sex"] = "sex_uri";
         $fields["typeStatus"] = "typeStatus_uri";
         $fields["lifeStage"] = "lifeStage_uri";
         $fields["collectionCode"] = "collectionCode_uri";
     }
     require_library('connectors/LifeDeskToScratchpadAPI');
     $func = new LifeDeskToScratchpadAPI();
     $spreadsheet_options = array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2);
     $spreadsheet_options["expire_seconds"] = 0;
     // false => won't expire; 0 => expires now
     $uris = array();
     if ($spreadsheet = @$params["uri_file"]) {
         if ($arr = $func->convert_spreadsheet($spreadsheet, 0, $spreadsheet_options)) {
             foreach ($fields as $key => $value) {
                 $i = 0;
                 foreach ($arr[$key] as $item) {
                     $item = trim($item);
                     if ($item) {
                         $uris[$item] = $arr[$value][$i];
                     }
                     $i++;
                 }
             }
         }
     }
     return $uris;
 }
 private function update_eol_xml($lifedesk_name)
 {
     /*
     taxon = 434
     dwc:ScientificName = 434
     reference = 614
     synonym = 68
     commonName = 2
     dataObjects = 1705
     reference = 0
     texts = 1146
     images = 559
     videos = 0
     sounds = 0
     */
     require_library('ResourceDataObjectElementsSetting');
     $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml";
     $func = new ResourceDataObjectElementsSetting($lifedesk_name, $resource_path);
     $xml = file_get_contents($this->text_path["eol_xml"]);
     $xml = $func->replace_taxon_element_value("dc:source", "replace any existing value", "", $xml, false);
     $xml = $func->replace_data_object_element_value("dc:source", "replace any existing value", "", $xml, false);
     $xml = self::remove_tags_in_references($xml);
     $func->save_resource_document($xml);
     // zip the xml
     $command_line = "gzip -c " . CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml >" . CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml.gz";
     $output = shell_exec($command_line);
 }
function remove_dataObject()
{
    require_library('ResourceDataObjectElementsSetting');
    $resource_id = 346;
    $resource_path = "http://localhost/eol_php_code/applications/content_server/resources/346.xml.gz";
    $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path);
    $xml = $nmnh->load_xml_string();
    $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml);
    $nmnh->save_resource_document($xml);
}
 function convert_to_dwca($params)
 {
     require_library('FlickrAPI');
     $auth_token = NULL;
     // if(FlickrAPI::valid_auth_token(FLICKR_AUTH_TOKEN)) $auth_token = FLICKR_AUTH_TOKEN;
     $page = 1;
     $per_page = 500;
     $url = $this->service['photosets'] . '&photoset_id=' . $params['photoset_id'] . '&user_id=' . $params['flickr_user_id'] . '&per_page=' . $per_page;
     if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) {
         $json = str_replace("\\'", "'", $json);
         $obj = json_decode($json);
         $total_pages = ceil($obj->photoset->total / $per_page);
         echo "\ntotal_pages = {$total_pages}\n";
         for ($i = 1; $i <= $total_pages; $i++) {
             if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) {
                 $json = str_replace("\\'", "'", $json);
                 $obj = json_decode($json);
                 $k = 0;
                 $total_photos = count($obj->photoset->photo);
                 foreach ($obj->photoset->photo as $rec) {
                     $k++;
                     echo "\n{$i} of {$total_pages} - {$k} of {$total_photos}";
                     if (!($sciname = self::get_sciname_from_title($rec->title))) {
                         continue;
                     }
                     // if($sciname == "SONY DSC") //debug
                     // {
                     //     print_r($rec);
                     // }
                     $photo_response = FlickrAPI::photos_get_info($rec->id, $rec->secret, $auth_token, $this->download_options);
                     $photo = @$photo_response->photo;
                     if (!$photo) {
                         continue;
                     }
                     if ($photo->visibility->ispublic != 1) {
                         continue;
                     }
                     if ($photo->usage->candownload != 1) {
                         continue;
                     }
                     if (@(!$GLOBALS["flickr_licenses"][$photo->license])) {
                         continue;
                     }
                     $data_objects = FlickrAPI::get_data_objects($photo, $params['flickr_user_id']);
                     foreach ($data_objects as $do) {
                         self::create_archive($sciname, $do);
                     }
                 }
             }
             $page++;
             // break; //debug
         }
     }
     $this->archive_builder->finalize(TRUE);
 }
 public static function top_images()
 {
     require_library('TopImages');
     $log = HarvestProcessLog::create(array('process_name' => 'Top Images'));
     $top_images = new TopImages();
     $top_images->begin_process();
     $top_images->top_concept_images(true);
     $top_images->top_concept_images(false);
     \Resque::enqueue('harvesting', 'CodeBridge', array('cmd' => 'denormalize_tables'));
     $log->finished();
 }
Beispiel #9
0
 public function __construct()
 {
     $this->mysqli =& $GLOBALS['db_connection'];
     if ($GLOBALS['ENV_NAME'] == 'production' && environment_defined('slave')) {
         $this->mysqli_slave = load_mysql_environment('slave');
     } else {
         $this->mysqli_slave =& $this->mysqli;
     }
     require_library('RubyNameParserClient');
     $this->name_parser = new RubyNameParserClient();
 }
 private function start($spreadsheet, $temp_path = false)
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $doc = self::download_file_accordingly($spreadsheet);
     $download_options = $this->download_options;
     $download_options['file_extension'] = self::get_extension($doc);
     if ($path = Functions::save_remote_file_to_local($doc, $download_options)) {
         $worksheets = self::get_worksheets($path, $parser, $temp_path);
         print_r($worksheets);
         foreach ($worksheets as $index => $worksheet_title) {
             echo "\nProcessing worksheet: [{$worksheet_title}]";
             if ($temp_path) {
                 $params = array("worksheet_title" => $worksheet_title, "path" => $temp_path);
                 $parser->convert_sheet_to_array($path, $index, NULL, $params);
             } else {
                 $arr = $parser->convert_sheet_to_array($path, $index);
                 if (!self::sheet_is_valid($arr, $worksheet_title)) {
                     echo " - invalid worksheet\n";
                     continue;
                 }
                 $fields = array();
                 $uris = array();
                 $fields = array_keys($arr);
                 foreach ($fields as $field) {
                     $uris[] = $arr[$field][0];
                 }
                 // print_r($fields); print_r($uris); continue;
                 $i = -1;
                 foreach ($arr[$fields[0]] as $row) {
                     $i++;
                     if ($i > 7) {
                         $rec = array();
                         foreach ($fields as $field) {
                             $rec[$field] = $arr[$field][$i];
                         }
                         if ($rec) {
                             self::create_archive($worksheet_title, $fields, $uris, $rec);
                         }
                     }
                 }
             }
         }
         unlink($path);
         if (file_exists($doc)) {
             unlink($doc);
         }
     } else {
         echo "\n [{$doc}] unavailable! \n";
     }
 }
 private function get_institution_uris()
 {
     require_library('connectors/LifeDeskToScratchpadAPI');
     $func = new LifeDeskToScratchpadAPI();
     $arr = $func->convert_spreadsheet($this->institutions_xls, 0);
     $institutions = array();
     $k = 0;
     foreach ($arr["Code"] as $code) {
         $institutions[$code] = $arr["URI"][$k];
         $k++;
     }
     echo "\nInstitutions from spreadsheet [{$this->institutions_xls}]:" . count($institutions) . "\n";
     return $institutions;
 }
Beispiel #12
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url);
         $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR");
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
Beispiel #13
0
 function process_xml($params)
 {
     $all_taxa = self::get_taxa_list_from_biogeodb();
     $this->taxa_id_list = array_merge($all_taxa, self::get_taxa_list_from_myspecies());
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => false));
     print_r($paths);
     $params["path"] = $paths["temp_dir"];
     $xml = self::update_xml($params);
     recursive_rmdir($paths["temp_dir"]);
     // remove temp dir
     return $xml;
 }
Beispiel #14
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0);
         $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year");
         $records = self::add_uppercase_fields($records);
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
Beispiel #15
0
 function model_boostrap()
 {
     require_library("/vendor/doctrine/Doctrine.php");
     spl_autoload_register(array('doctrine', 'autoload'));
     $conn = Doctrine_Manager::connection(dsn);
     $conn->setCollate('utf8_general_ci');
     $conn->setCharset('utf8');
     $conn->setAttribute(Doctrine::ATTR_AUTO_ACCESSOR_OVERRIDE, true);
     $conn->setAttribute(Doctrine::ATTR_AUTOLOAD_TABLE_CLASSES, true);
     $servers = array('host' => 'localhost', 'port' => 11211, 'persistent' => true);
     $cacheDriver = new Doctrine_Cache_Memcache(array('servers' => $servers, 'compression' => false));
     $conn->setAttribute(Doctrine::ATTR_QUERY_CACHE, $cacheDriver);
     Doctrine::loadModels(library_root . '/models/dal');
     Doctrine::loadModels(library_root . '/models');
 }
Beispiel #16
0
 function get_all_taxa()
 {
     require_library('XLSParser');
     $docs = count($this->spreadsheets);
     $doc_count = 0;
     foreach ($this->spreadsheets as $doc) {
         $doc_count++;
         echo "\n processing [{$doc}]...\n";
         if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
             $parser = new XLSParser();
             $arr = $parser->convert_sheet_to_array($path);
             $fields = array_keys($arr);
             $i = -1;
             $rows = count($arr["Species"]);
             echo "\n total {$path}: {$rows} \n";
             foreach ($arr["Species"] as $Species) {
                 $i++;
                 $rec = array();
                 foreach ($fields as $field) {
                     $rec[$field] = $arr[$field][$i];
                 }
                 $rec = array_map('trim', $rec);
                 /* breakdown when caching
                    $cont = false;
                    // if($i >= 1 && $i < 6000)         $cont = true;
                    // if($i >= 3000 && $i < 6000)      $cont = true;
                    // if($i >= 6000 && $i < 9000)      $cont = true;
                    // if($i >= 9000 && $i < 12000)     $cont = true;
                    // if($i >= 11800 && $i < 15000)    $cont = true;
                    if(!$cont) continue;
                    */
                 print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n";
                 $rec = self::clean_taxon_name($rec);
                 $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"]));
                 // remove parenthesis
                 $taxon_id = str_replace(" ", "_", $taxon_id);
                 $rec["taxon_id"] = md5($taxon_id);
                 self::create_instances_from_taxon_object($rec);
                 self::prepare_images($rec);
                 self::prepare_data($rec);
             }
             unlink($path);
         } else {
             echo "\n [{$doc}] unavailable! \n";
         }
     }
     $this->archive_builder->finalize(TRUE);
 }
Beispiel #17
0
function get_videos_from_vimeo()
{
    echo "\n -- start access to vimeo ";
    $resource_id = "temp_vimeo_to_tamborine2";
    require_library('connectors/VimeoAPI');
    $taxa = VimeoAPI::get_all_taxa(array("user1632860"));
    // Peter Kuttner's id
    $xml = \SchemaDocument::get_taxon_xml($taxa);
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    if (!($OUT = fopen($resource_path, "w"))) {
        debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
        return;
    }
    fwrite($OUT, $xml);
    fclose($OUT);
    echo " -- end.\n";
}
 public function get_all_taxa_keys($resource_id)
 {
     require_library('connectors/DiscoverLifeAPIv2');
     $func = new DiscoverLifeAPIv2();
     $taxa_objects = self::process_keys_spreadsheet();
     $all_taxa = array();
     $used_collection_ids = array();
     //initialize text file for DiscoverLife: save names without a page in EOL
     self::initialize_text_file(DOC_ROOT . self::TEXT_FILE_FOR_DL . "_" . "id_keys" . ".txt");
     $i = 0;
     $save_count = 0;
     $no_eol_page = 0;
     foreach ($taxa_objects as $name => $fields) {
         $i++;
         //filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL
         if (!($taxon = $func->with_eol_page($name))) {
             print "\n {$i} - no EOL page ({$name})";
             $no_eol_page++;
             self::store_name_to_text_file($name, "ID_Keys");
             continue;
         }
         $taxon["keys"] = array();
         foreach ($fields as $field) {
             $taxon["keys"][] = $field;
         }
         print "\n {$i} -- " . $taxon['orig_sciname'];
         //================================
         $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         if ($page_taxa) {
             $all_taxa = array_merge($all_taxa, $page_taxa);
         }
         unset($page_taxa);
     }
     $xml = \SchemaDocument::get_taxon_xml($all_taxa);
     $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
     if (!($OUT = fopen($resource_path, "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     $with_eol_page = $i - $no_eol_page;
     print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n";
 }
 function clean_media_extension($resource_id, $dwca_file)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     if ($paths = $func->extract_archive_file($dwca_file, "meta.xml")) {
         print_r($paths);
         if ($contents = Functions::get_remote_file($paths['archive_path'] . "media.txt", array('timeout' => 172800))) {
             $contents = str_ireplace('<a title=""', '<a title="', $contents);
             $contents = str_ireplace('"" href=""', '" href="', $contents);
             $contents = str_ireplace('"">', '">', $contents);
             //saving new media.txt
             if (!($WRITE = fopen($paths['archive_path'] . "media.txt", "w"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $paths['archive_path'] . "media.txt");
                 return;
             }
             fwrite($WRITE, $contents);
             fclose($WRITE);
             // remove the archive file e.g. plazi.zip
             $info = pathinfo($dwca_file);
             unlink($paths['archive_path'] . $info["basename"]);
             // creating the archive file
             $command_line = "tar -czf " . CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".tar.gz --directory=" . $paths['archive_path'] . " .";
             $output = shell_exec($command_line);
             // moving files to /resources/
             recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             if (!file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) {
                 mkdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             }
             $src = $paths['archive_path'];
             $dst = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "/";
             $files = glob($paths['archive_path'] . "*.*");
             foreach ($files as $file) {
                 $file_to_go = str_replace($src, $dst, $file);
                 copy($file, $file_to_go);
             }
         }
         // remove temp dir
         recursive_rmdir($paths['archive_path']);
         echo "\n temporary directory removed: " . $paths['archive_path'];
     }
 }
 private function get_uris()
 {
     require_library('connectors/LifeDeskToScratchpadAPI');
     $func = new LifeDeskToScratchpadAPI();
     $spreadsheet_options = array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2);
     // $spreadsheet_options["expire_seconds"] = 0; // false => won't expire; 0 => expires now
     $temp = $func->convert_spreadsheet($this->triple_uris_spreadsheet, 0, $spreadsheet_options);
     /* spreadsheet headers: Wikipedia triple - Measurement Type - Measurement Value1 - Measurement Value2 */
     $uris = array();
     $i = -1;
     foreach ($temp["Wikipedia triple"] as $triple) {
         $i++;
         if ($temp["Measurement Type"][$i] == "EXCLUDE") {
             continue;
         }
         $uris[$triple]["mtype"] = $temp["Measurement Type"][$i];
         $uris[$triple]["v1"] = @$temp["Measurement Value1"][$i];
         $uris[$triple]["v2"] = @$temp["Measurement Value2"][$i];
     }
     return $uris;
 }
 function export_xml_to_archive($params, $xml_file_YN = false)
 {
     if (!$xml_file_YN) {
         require_library('connectors/INBioAPI');
         $func = new INBioAPI();
         $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => 0));
         // "expire_seconds" -- false => won't expire; 0 => expires now //debug
         print_r($paths);
         $params["path"] = $paths["temp_dir"];
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         recursive_rmdir($paths["temp_dir"]);
         // remove temp dir
     } else {
         $params['path'] = DOC_ROOT . "tmp/";
         $local_xml_file = Functions::save_remote_file_to_local($params['eol_xml_file'], array('file_extension' => "xml", 'cache' => 0, "timeout" => 7200, "download_attempts" => 2, "delay_in_minutes" => 2));
         //debug - cache should be 0 zero in normal operation
         $params['filename'] = pathinfo($local_xml_file, PATHINFO_BASENAME);
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         unlink($local_xml_file);
     }
 }
 function get_all_taxa()
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     $paths = $func->extract_archive_file($this->dwca_file, "meta.xml");
     $archive_path = $paths['archive_path'];
     $harvester = new ContentArchiveReader(NULL, $archive_path);
     $tables = $harvester->tables;
     if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) {
         debug("Invalid archive file. Program will terminate.");
         return false;
     }
     $row_types = self::get_XML_fields($paths["temp_dir"] . "meta.xml", "rowType");
     /* manual assignment, use this if some referenceID in Measurements don't exist in References.tab
        $row_types = array("http://eol.org/schema/reference/Reference", "http://rs.tdwg.org/dwc/terms/Taxon", "http://rs.tdwg.org/dwc/terms/MeasurementOrFact", "http://rs.tdwg.org/dwc/terms/Occurrence");
        */
     print_r($row_types);
     foreach ($row_types as $row_type) {
         $basename = pathinfo($row_type, PATHINFO_BASENAME);
         if ($basename == "Taxon") {
             $allowed_fields = array("taxonID", "scientificName", "parentNameUsageID", "kingdom", "phylum", "class", "order", "family", "genus", "taxonRank", "furtherInformationURL", "taxonomicStatus", "taxonRemarks", "namePublishedIn", "referenceID");
         } else {
             $allowed_fields = self::get_XML_fields($this->extensions[$basename], "property name");
         }
         //manual adjustment
         if ($row_type == "VernacularName") {
             $allowed_fields[] = "taxonID";
         }
         self::process_fields($harvester->process_row_type($row_type), $basename, $allowed_fields);
         // e.g. self::process_fields($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'), "Taxon");
     }
     $this->archive_builder->finalize(TRUE);
     recursive_rmdir($paths['temp_dir']);
     // remove temp dir
     echo "\n temporary directory removed: " . $paths['temp_dir'];
     print_r($this->debug);
 }
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     require_library('connectors/BOLDSysAPI');
     $this->func = new BOLDSysAPI();
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             $this->func->create_master_list();
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         $this->archive_builder->finalize(true);
         // Set to force harvest
         Functions::set_resource_status_to_force_harvest($resource_id);
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt");
     }
 }
Beispiel #24
0
<?php

namespace php_active_record;

/* connector for Photosynth
estimated execution time: 29 secs.

This connector will use an un-official service to search the Photosynth server.
It also scrapes the Photosynth site to get the tags entered by owners as the tags where not included in the service.
*/
$timestart = microtime(1);
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('connectors/PhotosynthAPI');
$GLOBALS['ENV_DEBUG'] = false;
$taxa = PhotosynthAPI::get_all_taxa();
$xml = \SchemaDocument::get_taxon_xml($taxa);
$resource_path = CONTENT_RESOURCE_LOCAL_PATH . "119.xml";
if (!($OUT = fopen($resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
    return;
}
fwrite($OUT, $xml);
fclose($OUT);
$elapsed_time_sec = microtime(1) - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} sec              \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " min   \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hr \n";
echo "\n\n Done processing.";
Beispiel #25
0
<?php

namespace php_active_record;

/* 
NCBI Bio Projects
partner provides an XML dump
estimated execution time: 
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('connectors/NCBIProjectsAPI');
$timestart = time_elapsed();
$resource_id = 173;
$func = new NCBIProjectsAPI($resource_id);
$func->get_all_taxa();
if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working/taxon.tab") > 1000) {
    if (is_dir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) {
        recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous");
        Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id, CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous");
    }
    Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working", CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
    Functions::set_resource_status_to_force_harvest($resource_id);
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n\n elapsed time = " . $elapsed_time_sec / 60 . " minutes";
echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours";
echo "\n Done processing.\n";
Beispiel #26
0
<?php

namespace php_active_record;

/* estimated execution time:   */
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('connectors/SciELOAPI');
$timestart = time_elapsed();
$resource_id = "487";
$scielo_connector = new SciELOAPI($resource_id);
$scielo_connector->get_all_taxa();
Functions::finalize_dwca_resource($resource_id);
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n\n elapsed time = " . $elapsed_time_sec / 60 . " minutes";
echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours";
exit("\n Done processing.");
 private function get_worms_taxa()
 {
     require_library('connectors/FishBaseAPI');
     $func = new FishBaseAPI();
     $fields = array();
     $excluded_fields = array("taxonID", "furtherInformationURL", "referenceID", "namePublishedIn", "taxonomicStatus", "taxonRemarks", "rightsHolder", "parentNameUsageID");
     $taxa = $func->make_array($this->text_path["worms"]["worms_taxon"], $fields, "", $excluded_fields);
     array_shift($taxa);
     foreach ($taxa as $taxon) {
         $final[Functions::canonical_form($taxon["scientificName"])] = $taxon["taxonRank"];
     }
     unset($taxa);
     return $final;
 }
Beispiel #28
0
<?php

namespace php_active_record;

/* http://eol.org/content_partners/585/resources/831 - DATA-1622
This is a generic script that will convert EOL XML to EOL DWC-A
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('connectors/ConvertEOLtoDWCaAPI');
$timestart = time_elapsed();
$resource_id = 831;
$params["eol_xml_file"] = "";
$params["eol_xml_file"] = Functions::get_accesspoint_url_if_available($resource_id, "http://jhr.pensoft.net/lib/eol_exports/JHR.xml");
$params["filename"] = "no need to mention here.xml";
$params["dataset"] = "Pensoft XML files";
$params["resource_id"] = $resource_id;
$func = new ConvertEOLtoDWCaAPI($resource_id);
$func->export_xml_to_archive($params, true);
// true => means it is an XML file, not an archive file nor a zip file
Functions::finalize_dwca_resource($resource_id);
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n\n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n";
echo "\nDone processing.\n";
 private function process_text_files()
 {
     if (!self::load_zip_contents()) {
         return false;
     }
     print_r($this->text_path);
     require_library('connectors/FishBaseAPI');
     $func = new FishBaseAPI();
     $fields = array("IDNUM", "TAXONID", "TAXID1", "SUBORDER", "TAXID2", "DIVISION", "TAXID5", "SUPERFAM", "TAXID9", "FAMILY", "TAXID13", "SUBFAM", "TAXID17", "TRIBE", "TAXID20", "SUBTRIBE", "GENUS", "SUBGENUS", "SPECIES", "SUBSPECS", "RECTYPCD", "RECTYPE", "UPDATE", "UPDUSER", "LOCGENUS", "LOCSUBGN", "SPCGROUP", "LOCSPECS", "LOCSUBSP", "INFRASUB", "CAUTHOR", "ORIGAUTH", "AUTHOR", "PUBYEAR", "JOURNAL", "COLLATN", "INTROADV", "TYPEDEP", "TYPMATTX", "TYPELOC", "SEXTX", "TSGENUS", "TSSUBGEN", "TSSPECS", "TSSUBSP", "TSAUTHOR", "TSKIND", "INFRATYP", "LOCVAR", "SEQUENCE", "UPSTAT");
     $taxa = $func->make_array($this->text_path[1], $fields, "", array(), "^");
     /* to get the breakdown of taxa levels
        foreach($taxa as $rec)
        {
            if(@$temp[$rec["RECTYPE"]]) $temp[$rec["RECTYPE"]]++;
            else $temp[$rec["RECTYPE"]] = 1;
        }
        print_r($temp);
        // [Super-Generic ]        => 614 xxx
        // [Genus/Subgenus ]       => 2540 xxx
        // [Genus/Subgenus Synon]  => 3459
        // [Species ]              => 19970 xxx
        // [Species Synonym ]      => 11083
        // [Unplaced Taxon ]       => 445 xxx
        // [Nomen Nudum ]          => 177
        // [Species Group ]        => 440 xxx
        */
     $i = 0;
     $link = array();
     foreach ($taxa as $rec) {
         if (in_array($rec["RECTYPE"], array("Species", "Genus/Subgenus", "Species Group", "Super-Generic"))) {
             $i++;
             $sciname = $rec["GENUS"] . " " . $rec["SPECIES"];
             $sciname = trim($sciname);
             if ($rec["RECTYPE"] == "Species Group") {
                 $sciname = $rec["SPCGROUP"];
             }
             $temp[$sciname] = 1;
             $link = $this->create_instances_from_taxon_object($rec, $link);
         }
     }
     echo "\n\n total rows: " . count($taxa);
     echo "\n total taxa: " . count($temp);
     echo "\n link: " . count($link) . "\n";
     $free_text = self::process_free_text($func, $link);
     $geo_data = self::process_geo_data($link);
     $ecology = self::process_ecology($func, $link);
     $pollen_visitation = self::process_pollen_visitation($func, $link);
     $predation = self::process_predation($func, $link);
     return true;
     /* Waiting from SPG on items below this line */
     // some sort of references
     // $fields = array("IDNUM", "REVSNTX", "TAXNTX");
     // $references = $func->make_array($this->text_path[4], $fields, "", array(), "^");
     // references for biology and morphology
     // $fields = array("IDNUM", "BIOLGYTX", "MORPHTX");
     // $texts = $func->make_array($this->text_path[5], $fields, "", array(), "^");
     // $ref_ids = array();
     // $agent_ids = array();
     // foreach($texts as $text)
     // {
     //     $biology = $text["BIOLGYTX"];
     //     $morphology = $text["MORPHTX"];
     //     $taxon_id = @$link[$text["IDNUM"]];
     //     if($biology) $ref_ids = array_merge($ref_ids, get_object_reference_ids($biology));
     //     if($morphology) $ref_ids = array_merge($ref_ids, get_object_reference_ids($morphology));
     // }
     // comma-separated taxon names
     // $fields = array("IDNUM", "AHOSTTX", "PHOSTTX");
     // $taxon_remarks = $func->make_array($this->text_path[7], $fields, "", array(), "^");
     // comma-separated taxon names
     // $fields = array("IDNUM", "PARATX", "SECHOSTX");
     // $taxon_remarks2 = $func->make_array($this->text_path[8], $fields, "", array(), "^");
     // wait for SPG to classify
     // $fields = array("IDNUM", "HYPATX", "SYNTX");
     // $comments = $func->make_array($this->text_path[10], $fields, "", array(), "^");
 }
Beispiel #30
0
<?php

/* (C) 2009 Netology Joy Web Framework v.0.2, All rights reserved.
 *
 * Author(s):
 *   Hasan Ozgan (meddah@netology.org)
 * 
 * For the full copyright and license information, 
 * please view the LICENSE file that was distributed 
 * with this source code.
 */
require_library("/vendor/adodb/adodb.inc.php");
require_library("/validation.php");
class model
{
    public $db;
    public $error;
    public $is_valid;
    public function __construct($conn)
    {
        $this->error = array();
        $this->is_valid = new validation();
        if (is_object($conn) == false) {
            $this->db = database::connection($conn);
        } else {
            if (get_parent_class($conn) == "ADOConnection") {
                $this->db = $conn;
            } else {
                die("Database Object Not Found");
            }
        }