function load_xml_string()
 {
     $file_contents = "";
     debug("Please wait, downloading resource document...");
     if (preg_match("/^(.*)\\.(gz|gzip)\$/", $this->xml_path, $arr)) {
         $path_parts = pathinfo($this->xml_path);
         $filename = $path_parts['basename'];
         $temp_dir = create_temp_dir() . "/";
         debug("temp file path: " . $temp_dir);
         if ($file_contents = Functions::get_remote_file($this->xml_path, array('timeout' => 172800))) {
             $temp_file_path = $temp_dir . "/" . $filename;
             $TMP = fopen($temp_file_path, "w");
             fwrite($TMP, $file_contents);
             fclose($TMP);
             shell_exec("gunzip -f {$temp_file_path}");
             $this->xml_path = $temp_dir . str_ireplace(".gz", "", $filename);
             debug("xml path: " . $this->xml_path);
         } else {
             debug("Connector terminated. Remote files are not ready.");
             return false;
         }
         echo "\n {$temp_dir} \n";
         $file_contents = Functions::get_remote_file($this->xml_path, array('timeout' => 172800));
         recursive_rmdir($temp_dir);
         // remove temp dir
         echo "\n temporary directory removed: [{$temp_dir}]\n";
     }
     return $file_contents;
 }
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->fishbase_data, array('timeout' => 172800))) {
         $temp_file_path = $this->TEMP_FILE_PATH . "/fishbase.zip";
         $TMP = fopen($temp_file_path, "w");
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("tar -xzf {$temp_file_path} -C {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
                 return;
             }
         }
         $this->text_path['TAXON_PATH'] = $this->TEMP_FILE_PATH . "/taxon.txt";
         $this->text_path['TAXON_COMNAMES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_comnames.txt";
         $this->text_path['TAXON_DATAOBJECT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject.txt";
         $this->text_path['TAXON_DATAOBJECT_AGENT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_agent.txt";
         $this->text_path['TAXON_DATAOBJECT_REFERENCE_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_reference.txt";
         $this->text_path['TAXON_REFERENCES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_references.txt";
         $this->text_path['TAXON_SYNONYMS_PATH'] = $this->TEMP_FILE_PATH . "/taxon_synonyms.txt";
     } else {
         echo "\n\n Connector terminated. Remote files are not ready.\n\n";
         return;
     }
 }
Пример #3
0
 function __construct($folder = null, $query = null)
 {
     if ($folder) {
         $this->query = $query;
         $this->taxa = array();
         $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
         $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
         $this->occurrence_ids = array();
         $this->measurement_ids = array();
     }
     $this->download_options = array('expire_seconds' => 5184000, 'download_wait_time' => 2000000, 'timeout' => 10800, 'download_attempts' => 1);
     //2 months to expire
     // $this->download_options['expire_seconds'] = false; //debug
     // local
     $this->families_list = "http://localhost/cp/NCBIGGI/falo2.in";
     $this->families_list = "https://dl.dropboxusercontent.com/u/7597512/NCBI_GGI/falo2.in";
     // NCBI service
     $this->family_service_ncbi = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&usehistory=y&term=";
     // $this->family_service_ncbi = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&usehistory=y&term=";
     /* to be used if u want to get all Id's, that is u will loop to get all Id's so server won't be overwhelmed: &retmax=10&retstart=0 */
     // GGBN data portal:
     $this->family_service_ggbn = "http://www.dnabank-network.org/Query.php?family=";
     // original
     $this->family_service_ggbn = "http://data.ggbn.org/Query.php?family=";
     // "Dröge, Gabriele" <*****@*****.**> advised to use this instead, Apr 17, 2014
     //GBIF services
     $this->gbif_taxon_info = "http://api.gbif.org/v1/species/match?name=";
     //http://api.gbif.org/v1/species/match?name=felidae&kingdom=Animalia
     $this->gbif_record_count = "http://api.gbif.org/v1/occurrence/count?taxonKey=";
     // BHL services
     $this->bhl_taxon_page = "http://www.biodiversitylibrary.org/name/";
     $this->bhl_taxon_in_csv = "http://www.biodiversitylibrary.org/namelistdownload/?type=c&name=";
     $this->bhl_taxon_in_xml = "http://www.biodiversitylibrary.org/api2/httpquery.ashx?op=NameGetDetail&apikey=deabdd14-65fb-4cde-8c36-93dc2a5de1d8&name=";
     // BOLDS portal
     $this->bolds_taxon_page = "http://www.boldsystems.org/index.php/Taxbrowser_Taxonpage?searchTax=&taxon=";
     $this->bolds_taxon_page_id = "http://www.boldsystems.org/index.php/Taxbrowser_Taxonpage?taxid=";
     $this->bolds["TaxonSearch"] = "http://www.boldsystems.org/index.php/API_Tax/TaxonSearch?taxName=";
     $this->bolds["TaxonData"] = "http://www.boldsystems.org/index.php/API_Tax/TaxonData?dataTypes=basic,stats&taxId=";
     // stats
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->names_no_entry_from_partner_dump_file = $this->TEMP_DIR . "names_no_entry_from_partner.txt";
     $this->name_from_eol_api_dump_file = $this->TEMP_DIR . "name_from_eol_api.txt";
     $this->names_dae_to_nae_dump_file = $this->TEMP_DIR . "names_dae_to_nae.txt";
     /* // FALO report
        $this->names_in_falo_but_not_in_irmng = $this->TEMP_DIR . "families_in_falo_but_not_in_irmng.txt";
        $this->names_in_irmng_but_not_in_falo = $this->TEMP_DIR . "families_in_irmng_but_not_in_falo.txt";
        */
     $this->ggi_databases = array("ncbi", "ggbn", "gbif", "bhl", "bolds");
     // $this->ggi_databases = array("gbif"); //debug
     $this->ggi_path = DOC_ROOT . "temp/GGI/";
     $this->eol_api["search"] = "http://eol.org/api/search/1.0.json?page=1&exact=true&filter_by_taxon_concept_id=&filter_by_hierarchy_entry_id=&filter_by_string=&cache_ttl=&q=";
     $this->eol_api["page"][0] = "http://eol.org/api/pages/1.0/";
     $this->eol_api["page"][1] = ".json?images=0&videos=0&sounds=0&maps=0&text=0&iucn=false&subjects=overview&licenses=all&details=true&common_names=false&synonyms=false&references=false&vetted=1&cache_ttl=";
     $this->databases_to_check_eol_api["ncbi"] = "NCBI Taxonomy";
     $this->databases_to_check_eol_api["gbif"] = "GBIF Nub Taxonomy";
     $this->databases_to_check_eol_api["ggbn"] = "ITIS Catalogue of Life";
     $this->databases_to_check_eol_api["bolds"] = "-BOLDS-";
     $this->temp_family_table_file = DOC_ROOT . "tmp/family_table.txt";
 }
Пример #4
0
 function get_all_taxa_v2($letters)
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     self::generate_taxa_list($letters);
     self::save_data_to_text();
     echo "\n\n finished processing: [{$letters}]\n\n";
     return;
     // you need to consolidate the processed text files before proceeding.
 }
 private function prepare_files()
 {
     if ($input_file = Functions::save_remote_file_to_local($this->source_file_path, array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         $temp_dir = create_temp_dir() . "/";
         $this->source_file_path = $temp_dir . "spg_falo.txt";
         self::convert_xlsx_to_tab($input_file, $this->source_file_path);
         unlink($input_file);
     }
 }
Пример #6
0
 function __construct($folder)
 {
     $this->taxa = array();
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->synonym_ids = array();
     $this->name_id = array();
     $this->invalid_statuses = array("Orthographic variant", "Invalid", "Illegitimate", "Uncertain", "Unavailable", "Deleted");
     $this->service_search["startswith_legitimate"] = 'http://www.mycobank.org/Services/Generic/SearchService.svc/rest/xml?layout=14682616000000161&limit=0&filter=NameStatus_="Legitimate" AND Name STARTSWITH ';
     $this->service_search["startswith"] = 'http://www.mycobank.org/Services/Generic/SearchService.svc/rest/xml?layout=14682616000000161&limit=0&filter=Name STARTSWITH ';
     $this->service_search["exact"] = 'http://www.mycobank.org/Services/Generic/SearchService.svc/rest/xml?layout=14682616000000161&limit=0&filter=Name=';
     $this->download_options = array('download_wait_time' => 5000000, 'expire_seconds' => 5184000, 'timeout' => 7200, 'delay_in_minutes' => 3);
     // 2 months expire_seconds
     $this->download_options['expire_seconds'] = false;
     // $this->download_options['cache_path'] = "/Volumes/Eli blue/eol_cache/"; -- no longer used
     // /*
     $this->mycobank_taxa_list = "http://localhost/cp/MycoBank/mycobank_taxon.tab";
     $this->not_found_from_previous_harvest = "http://localhost/cp/MycoBank/not_found_from_previous_harvest.txt";
     // alias names_not_yet_entered.txt
     // */
     // $this->mycobank_taxa_list              = "https://dl.dropboxusercontent.com/u/7597512/MycoBank/mycobank_taxon.tab";
     // $this->not_found_from_previous_harvest = "https://dl.dropboxusercontent.com/u/7597512/MycoBank/not_found_from_previous_harvest.txt";
     $this->dont_search_more_than_5h = array("Phoma ", "Uredo ", "Entoloma ", "Lichen ", "Patellaria ", "Hygrophorus ", "Mollisia ", "Omphalia ", "Cordyceps ", "Gloeosporium ", "Collema ", "Pholiota ", "Sticta ", "Placodium ", "Biatora ", "Thelephora ", "Lycoperdon ", "Thelotrema ", "Peltigera ", "Hydnum ", "Passalora ", "Pestalotia ", "Trametes ", "Pluteus ", "Peniophora ", "Candida ", "Valsa ", "Coprinus ", "Psilocybe ", "Diaporthe ", "Uromyces ", "Puccinia ", "Agaricus ", "Metasphaeria ", "Aspicilia ", "Poria ", "Pyrenula ", "Pleurotus ", "Acarospora ", "Catillaria ", "Alternaria ", "Sphaeropsis ", "Coniothyrium ", "Helminthosporium ", "Cetraria ", "Calicium ", "Cytospora ", "Phyllosticta ", "Macrophoma ", "Hymenoscyphus ", "Aspergillus ", "Colletotrichum ", "Rhodophyllus ", "Mucor ", "Peronospora ", "Porina ", "Cladosporium ", "Stereocaulon ", "Stereum ", "Rhizocarpon ", "Rhabdospora ", "Laboulbenia ", "Lentinus ", "Naucoria ", "Xanthoparmelia ", "Xylaria ", "Crepidotus ", "Dasyscyphus ", "Hebeloma ", "Dicaeoma ", "Fomes ", "Arthopyrenia ", "Ramaria ", "Hygrocybe ", "Graphina ", "Saccharomyces ", "Physarum ", "Merulius ", "Tremella ", "Dothidea ", "Camarosporium ", "Cercospora ", "Fusarium ", "Sphaerella ", "Parmelia ", "Lecanora ", "Verrucaria ", "Lecidea ", "Sphaeria ", "Ascochyta ", "Hendersonia ", "Physcia ", "Helotium ", "Boletus ", "Buellia ", "Diplodia ", "Peziza ", "Nectria ", "Lepiota ", "Asterina ", "Collybia ", "Leptosphaeria ", "Pleospora ", "Erysiphe ", "Arthonia ", "Hypoxylon ", "Clitocybe ", "Graphis ", "Opegrapha ", "Rinodina ", "Mycosphaerella ", "Phomopsis ", "Phyllachora ", "Pseudocercospora ", "Marasmius ", "Usnea ", "Ustilago ", "Clavaria ", "Bacidia ", "Polystictus ", "Aecidium ", "Psathyrella ", "Ramularia ", "Corticium ", "Polyporus ", "Ramalina ", "Amanita ", "Tricholoma ", "Lactarius ", "Penicillium ", "Septoria ", "Russula ", "Cladonia ", "Inocybe ", "Meliola ", "Caloplaca ", "Cortinarius ", "Agaricus p", "Agaricus c", "Camarosporium p", "Pertusaria ", "Sphaeronaema ", "Parmelia c", "Parmelia p", "Parmelia s", "Lecanora c", "Lecanora s", "Puccinia a", "Puccinia c", "Puccinia p", "Agaricus a", "Agaricus m", "Lecidea a", "Lecidea c", "Lecidea p", "Lecidea s", "Sphaeria c", "Oidium ", "Stagonospora ", "Didymosphaeria ", "Diplodina ", "Didymella ", "Mycena ", "Agaricus s", "Montagnellaceae ", "Cantharellus ", "Conocybe ", "Lachnum ", "Allantoporthe ", "Eccilia ", "Phaeangium ", "Hypochnus ", "Hypocline ", "Hypocopra ", "Melaspilea ", "Pseudomicrocera ", "Pseudonectria ", "Hypocrea ", "Asteridiella ", "Fungus ", "Cortinarius c", "Cortinarius p", "Cortinarius s");
     $this->dont_search_these_strings_as_well = array("");
     $this->dump_no = 0;
     $this->dump_no2 = 1;
     //for stats
     $this->TEMP_DIR = create_temp_dir() . "/";
     // $this->TEMP_DIR = DOC_ROOT . '/public/tmp/mycobank/'; //debug
     $this->dump_file = $this->TEMP_DIR . "mycobank_dump.txt";
     $this->names_with_error_dump_file = $this->TEMP_DIR . "names_with_error.txt";
     // stores names when API timesout or has errors
     $this->more_than_1k = $this->TEMP_DIR . "more_than_1k.txt";
     $this->more_than_5h = $this->TEMP_DIR . "more_than_5h.txt";
     $this->taxa_dump_file = $this->TEMP_DIR . "dump_taxa.txt";
     $this->names_not_yet_entered = array();
     $this->no_entry_parent = array();
     $this->no_entry_current = array();
     $this->no_entry_synonym = array();
     $this->names_not_yet_entered_dump_file = $this->TEMP_DIR . "names_not_yet_entered.txt";
     // stores names that are missing, not yet searched, not yet cached
     $this->no_entry_parent_dump_file = $this->TEMP_DIR . "no_entry_parent.txt";
     $this->no_entry_current_dump_file = $this->TEMP_DIR . "no_entry_current.txt";
     $this->no_entry_synonym_dump_file = $this->TEMP_DIR . "no_entry_synonym.txt";
     $this->debug = array();
     /*  as of Apr 28 2014
             type:
                 [Basionym] => 239143
                 [Combination] => 122550
                 [Nomen novum] => 4203
             status:
                 [Legitimate] => 365896
         */
 }
Пример #7
0
 function __construct($folder)
 {
     $this->resource_reference_ids = array();
     $this->resource_agent_ids = array();
     $this->SPM = 'http://rs.tdwg.org/ontology/voc/SPMInfoItems';
     $this->occurrence_ids = array();
     $this->taxon_ids = array();
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->tropicos_ids_list_file = $this->TEMP_DIR . "tropicos_ids.txt";
     $this->download_options = array('resource_id' => 218, 'cache_path' => '/Volumes/Eli black/eol_cache_tropicos/', 'expire_seconds' => false, 'download_wait_time' => 1000000, 'timeout' => 10800, 'download_attempts' => 1);
     //, 'delay_in_minutes' => 1
 }
 function __construct($folder)
 {
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->resource_reference_ids = array();
     $this->resource_agent_ids = array();
     $this->SPM = 'http://rs.tdwg.org/ontology/voc/SPMInfoItems';
     $this->occurrence_ids = array();
     $this->taxon_ids = array();
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->tropicos_ids_list_file = $this->TEMP_DIR . "tropicos_ids.txt";
     $this->download_options = array('expire_seconds' => false, 'download_wait_time' => 1000000, 'timeout' => 10800, 'download_attempts' => 1, 'delay_in_minutes' => 1);
 }
Пример #9
0
 public function output_directory()
 {
     if ($this->path_to_specified_output_path) {
         if (is_dir($this->path_to_specified_output_path)) {
             return $this->path_to_specified_output_path;
         } else {
             if (mkdir($this->path_to_specified_output_path)) {
                 return $this->path_to_specified_output_path;
             }
             $this->errors[] = "Unable to extract contents from Excel file";
             return null;
         }
     }
     return create_temp_dir('dwca');
 }
 function __construct($folder = null)
 {
     $this->taxon_ids = array();
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->occurrence_ids = array();
     // $this->species_list_export = "http://localhost/~eolit/cp/Environments/eol_env_annotations_noParentTerms.tar.gz"; //local
     $this->species_list_export = "http://download.jensenlab.org/EOL/eol_env_annotations_noParentTerms.tar.gz";
     $this->download_options = array('timeout' => 3600, 'download_attempts' => 1, 'delay_in_minutes' => 1);
     $this->download_options['expire_seconds'] = false;
     // since taxon_concept_id and hierarchy_entry_id won't change the resulting API response won't also change
     // $this->download_options['cache_path'] = "/Volumes/Eli blue/eol_cache/";
     // $this->download_options['expire_seconds'] = 0;
     // stats
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->need_to_check_tc_id_dump_file = $this->TEMP_DIR . "need_to_check_tc_id.txt";
 }
Пример #11
0
 public function __construct()
 {
     $this->TEMP_FILE_PATH = DOC_ROOT . "/update_resources/connectors/files/BOLD/";
     $this->WORK_LIST = DOC_ROOT . "/update_resources/connectors/files/BOLD/hl_work_list.txt";
     //hl - higher level taxa
     $this->WORK_IN_PROGRESS_LIST = DOC_ROOT . "/update_resources/connectors/files/BOLD/hl_work_in_progress_list.txt";
     $this->INITIAL_PROCESS_STATUS = DOC_ROOT . "/update_resources/connectors/files/BOLD/hl_initial_process_status.txt";
     $this->MASTER_LIST = DOC_ROOT . "/update_resources/connectors/files/BOLD/hl_master_list.txt";
     // $this->MASTER_LIST            = DOC_ROOT . "/update_resources/connectors/files/BOLD/hl_master_list_small.txt"; // debug
     $this->service["id"] = "http://www.boldsystems.org/index.php/API_Tax/TaxonData?dataTypes=basic,stats,geo&includeTree=true&taxId=";
     // for stats
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->erroneous_ids = $this->TEMP_DIR . "erroneous_ids.txt";
     $this->does_not_exist_anymore = $this->TEMP_DIR . "does_not_exist_anymore.txt";
     $this->download_options = array('expire_seconds' => 7776000, 'download_wait_time' => 500000, 'timeout' => 1200, 'download_attempts' => 2);
     // $this->download_options['cache_path'] = "/Volumes/Eli blue/eol_cache/";
 }
Пример #12
0
 function generate_traits_for_taxa($params)
 {
     $this->temp_dir = create_temp_dir() . "/";
     $taxa = self::access_spreadsheet($params, 0);
     $this->traits = self::access_spreadsheet($params, 1);
     self::process_taxa($taxa, "get_measurement_fields");
     self::initialize_text_files();
     self::process_taxa($taxa, "process_taxa");
     self::delete_blank_text_files();
     // compress text files, delete temp dir
     $trait_request_dir_path = DOC_ROOT . "/public/tmp/trait_request/";
     if (!is_dir($trait_request_dir_path)) {
         mkdir($trait_request_dir_path);
     }
     $command_line = "tar -czf " . $trait_request_dir_path . $params["name"] . ".tar.gz --directory=" . $this->temp_dir . " .";
     $output = shell_exec($command_line);
     recursive_rmdir($this->temp_dir);
 }
Пример #13
0
 function __construct($folder)
 {
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->taxon_ids = array();
     $this->object_ids = array();
     $this->domain = "http://americaninsects.net/";
     $this->to_exclude = array("index.", "glossary.", "maps.", "acknowledgment", "about.html", "faq.html", "works-consulted", "23", "24", "periplaneta-americana", "/http:");
     $this->stored_offline_urls_dump_file = "http://localhost/~eolit/cp/AmericanInsects/offline_urls.txt";
     $this->stored_offline_urls_dump_file = "https://dl.dropboxusercontent.com/u/7597512/AmericanInsects/offline_urls.txt";
     $this->download_options = array("download_wait_time" => 1000000, "timeout" => 1800, "download_attempts" => 1, "cache" => 1);
     // "expire_seconds" => 0 , "delay_in_minutes" => 2
     $this->debug = array();
     //for stats
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->current_offline_urls_dump_file = $this->TEMP_DIR . "offline_urls.txt";
     $this->current_offline_urls_dump_file2 = $this->TEMP_DIR . "offline_urls_unique.txt";
 }
Пример #14
0
 function extract_archive_file($dwca_file, $check_file_or_folder_name, $download_options = array('timeout' => 172800, 'expire_seconds' => 0))
 {
     debug("Please wait, downloading resource document...");
     $path_parts = pathinfo($dwca_file);
     $filename = $path_parts['basename'];
     $temp_dir = create_temp_dir() . "/";
     debug($temp_dir);
     if ($file_contents = Functions::lookup_with_cache($dwca_file, $download_options)) {
         $temp_file_path = $temp_dir . "" . $filename;
         debug("temp_dir: {$temp_dir}");
         debug("Extracting... {$temp_file_path}");
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         sleep(5);
         if (preg_match("/^(.*)\\.(tar.gz|tgz)\$/", $dwca_file, $arr)) {
             $cur_dir = getcwd();
             chdir($temp_dir);
             shell_exec("tar -zxvf {$temp_file_path}");
             chdir($cur_dir);
             $archive_path = str_ireplace(".tar.gz", "", $temp_file_path);
         } elseif (preg_match("/^(.*)\\.(zip)\$/", $dwca_file, $arr) || preg_match("/mcz_for_eol(.*?)/ims", $dwca_file, $arr)) {
             shell_exec("unzip -ad {$temp_dir} {$temp_file_path}");
             $archive_path = str_ireplace(".zip", "", $temp_file_path);
         } else {
             debug("-- archive not gzip or zip. [{$dwca_file}]");
             return;
         }
         debug("archive path: [" . $archive_path . "]");
     } else {
         debug("Connector terminated. Remote files are not ready.");
         return;
     }
     if (file_exists($temp_dir . $check_file_or_folder_name)) {
         return array('archive_path' => $temp_dir, 'temp_dir' => $temp_dir);
     } elseif (file_exists($archive_path . "/" . $check_file_or_folder_name)) {
         return array('archive_path' => $archive_path, 'temp_dir' => $temp_dir);
     } else {
         debug("Can't extract archive file. Program will terminate.");
         return;
     }
 }
Пример #15
0
 function __construct($folder)
 {
     $this->taxa = array();
     $this->path_to_archive_directory = CONTENT_RESOURCE_LOCAL_PATH . '/' . $folder . '_working/';
     $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => $this->path_to_archive_directory));
     $this->resource_reference_ids = array();
     $this->resource_agent_ids = array();
     $this->taxon_ids = array();
     $this->object_ids = array();
     $this->domain = "http://turbellaria.umaine.edu";
     $this->taxa_url = $this->domain . "/turb3.php?action=1&code=";
     $this->rights_holder = "National Science Foundation - Turbellarian Taxonomic Database";
     $this->agents = array();
     $this->agents[] = array("role" => "compiler", "homepage" => "http://turbellaria.umaine.edu/", "name" => "Seth Tyler");
     $this->agents[] = array("role" => "compiler", "homepage" => "http://turbellaria.umaine.edu/", "name" => "Steve Schilling");
     $this->agents[] = array("role" => "compiler", "homepage" => "http://turbellaria.umaine.edu/", "name" => "Matt Hooge");
     $this->agents[] = array("role" => "compiler", "homepage" => "http://turbellaria.umaine.edu/", "name" => "Louise Bush");
     $this->SPM = "http://rs.tdwg.org/ontology/voc/SPMInfoItems";
     $this->EOL = 'http://www.eol.org/voc/table_of_contents';
     $this->TEMP_DIR = create_temp_dir() . "/";
     $this->dump_file = $this->TEMP_DIR . "turbellarian_dump.txt";
     $this->dump_file_hierarchy = $this->TEMP_DIR . "turbellarian_hierarchy_dump.txt";
     $this->dump_file_synonyms = $this->TEMP_DIR . "turbellarian_synonyms_dump.txt";
 }
Пример #16
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     $download_options = $this->download_options;
     $download_options['expire_seconds'] = 1728000;
     // expire_seconds = 20 days in normal operation 1728000
     if ($file_contents = Functions::lookup_with_cache($this->fishbase_data, $download_options)) {
         $temp_file_path = $this->TEMP_FILE_PATH . "/fishbase.zip";
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
                 return;
             }
         }
         $this->text_path['TAXON_PATH'] = $this->TEMP_FILE_PATH . "/taxon.txt";
         $this->text_path['TAXON_COMNAMES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_comnames.txt";
         $this->text_path['TAXON_DATAOBJECT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject.txt";
         $this->text_path['TAXON_DATAOBJECT_AGENT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_agent.txt";
         $this->text_path['TAXON_DATAOBJECT_REFERENCE_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_reference.txt";
         $this->text_path['TAXON_REFERENCES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_references.txt";
         $this->text_path['TAXON_SYNONYMS_PATH'] = $this->TEMP_FILE_PATH . "/taxon_synonyms.txt";
     } else {
         echo "\n\n Connector terminated. Remote files are not ready.\n\n";
         return;
     }
 }
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::lookup_with_cache($this->zip_path, array('download_wait_time' => 1000000, 'timeout' => 7200, 'download_attempts' => 1, 'delay_in_minutes' => 2))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         $TMP = fopen($temp_file_path, "w");
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("tar -xzf {$temp_file_path} -C {$this->TEMP_FILE_PATH}");
         if (file_exists($this->TEMP_FILE_PATH . "/europ_taxa_berlin.txt")) {
             $this->text_path["taxa"] = $this->TEMP_FILE_PATH . "/europ_taxa_berlin.txt";
             $this->text_path["media"] = $this->TEMP_FILE_PATH . "/europ_media_berlin.txt";
             return TRUE;
         } else {
             return FALSE;
         }
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return FALSE;
     }
 }
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->zip_path, array('timeout' => 172800, 'download_attempts' => 2))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (file_exists($this->TEMP_FILE_PATH . "/all_species.csv")) {
             $this->text_path["species"] = $this->TEMP_FILE_PATH . "/all_species.csv";
             $this->text_path["prokaryota"] = $this->TEMP_FILE_PATH . "/prokaryota.csv";
             $this->text_path["eukaryota"] = $this->TEMP_FILE_PATH . "/eukaryota (2).csv";
             print_r($this->text_path);
             return TRUE;
         } else {
             return FALSE;
         }
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return FALSE;
     }
 }
Пример #19
0
 private function load_zip_contents($zip_path, $download_options, $files, $extension)
 {
     $text_path = array();
     $temp_path = create_temp_dir();
     if ($file_contents = Functions::get_remote_file($zip_path, $download_options)) {
         $parts = pathinfo($zip_path);
         $temp_file_path = $temp_path . "/" . $parts["basename"];
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$temp_path}");
         if (file_exists($temp_path . "/" . $files[0] . $extension)) {
             foreach ($files as $file) {
                 $text_path[$file] = $temp_path . "/" . $file . $extension;
             }
         } else {
             return;
         }
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
     }
     return $text_path;
 }
Пример #20
0
 private function unzip_then_move_images_to_temp_folder()
 {
     echo "\n count: " . count($this->debug_archives);
     $i = 0;
     $source_dir = "/Volumes/Time_Machine_Backups/dir_nbii_zip/";
     $target_dir = "/Volumes/Time_Machine_Backups/dir_nbii_images/";
     $not_ok = array();
     $failed = array();
     $failed2 = array();
     $missing_filenames = array();
     foreach ($this->debug_archives as $zip => $images) {
         $temp_dir = create_temp_dir() . "/";
         // $zip = "Animals_Amphibians_FrogsToads.zip"; //debug
         $zip_file = $source_dir . $zip;
         $parts = pathinfo($zip);
         echo "\n opening {$zip_file} ...";
         $output = shell_exec("unzip {$zip_file} -d {$temp_dir}");
         // $images[0] =  "Animals_Amphibians_FrogsToads_00035.jpg"; //debug
         foreach ($images as $image) {
             $i++;
             $file = $temp_dir . $parts["filename"] . "/" . trim($image);
             $newfile = $target_dir . "" . $image;
             if (!file_exists($newfile)) {
                 echo "\n {$i}. {$newfile}";
                 if (!copy($file, $newfile)) {
                     echo "\n failed to copy [{$file}] [{$zip_file}]...\n";
                     $failed[$zip_file] = 1;
                     $failed2[$zip_file][] = $file;
                     $missing_filenames[$image] = 1;
                 } else {
                     $this->debug_copied++;
                 }
             } else {
                 $this->debug_exists++;
             }
         }
         recursive_rmdir($temp_dir);
     }
     echo "\n\n not ok zip files: \n";
     print_r($not_ok);
     print_r($failed);
     print_r($failed2);
     if ($missing_filenames) {
         if (!($WRITE = fopen($this->missing_filenames_text_file, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->missing_filenames_text_file);
             return;
         }
         $missing_filenames = array_keys($missing_filenames);
         echo "\n\n count missing filenames: " . count($missing_filenames);
         fwrite($WRITE, json_encode($missing_filenames));
         fclose($WRITE);
     }
 }
 function export_lifedesk_taxonomy($params)
 {
     $this->text_path = array();
     if (self::load_zip_contents($params["lifedesk"])) {
         // initialize
         $type = "parent_child";
         self::initialize_dump_file($this->text_path[$type]);
         $headers = self::get_column_headers($this->file_importer_xls[$type]);
         print_r($headers);
         $this->lifedesk_fields[$type] = $headers;
         self::save_to_dump(implode("\t", $headers), $this->text_path[$type]);
         /* fields from template
            [0] => Identifier
            [1] => Parent
            [2] => Child
            [3] => Rank
            [4] => Synonyms
            [5] => Vernaculars
            [6] => VernacularsLanguage
            [7] => Description
            */
         // loop xml
         if (!($xml = self::load_xml())) {
             exit("\nLifeDesk XML is invalid\n\n");
         }
         foreach ($xml->taxon as $t) {
             $dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
             $dc = $t->children("http://purl.org/dc/elements/1.1/");
             $rec = array();
             if (preg_match("/tid:(.*?)_xxx/ims", (string) $dc->identifier . "_xxx", $arr)) {
                 $rec["Identifier"] = $arr[1];
             }
             if ($val = $dwc->Genus) {
                 $rec["Parent"] = (string) $val;
             } elseif ($val = $dwc->Family) {
                 $rec["Parent"] = (string) $val;
             } elseif ($val = $dwc->Order) {
                 $rec["Parent"] = (string) $val;
             } elseif ($val = $dwc->Class) {
                 $rec["Parent"] = (string) $val;
             } elseif ($val = $dwc->Phylum) {
                 $rec["Parent"] = (string) $val;
             } elseif ($val = $dwc->Kingdom) {
                 $rec["Parent"] = (string) $val;
             }
             $rec["Child"] = (string) $dwc->ScientificName;
             $rec["Rank"] = "";
             // ???
             $temp = array();
             foreach ($t->synonym as $name) {
                 $temp[] = (string) $name;
             }
             $rec["Synonyms"] = implode("|", $temp);
             $temp = array();
             foreach ($t->commonName as $name) {
                 $temp[] = (string) $name;
             }
             $rec["Vernaculars"] = implode(",", $temp);
             $rec["VernacularsLanguage"] = "";
             $rec["Description"] = "";
             self::save_to_template($rec, $this->text_path[$type], $type);
         }
         // compress
         $destination_folder = create_temp_dir() . "/";
         // move file to temp folder for compressing
         if ($path = $this->text_path[$type]) {
             $parts = pathinfo($path);
             copy($this->text_path[$type], $destination_folder . $parts["basename"]);
         }
         // compress export files
         $command_line = "tar -czf " . DOC_ROOT . "/public/tmp/lifedesk_exports/" . $params["name"] . "_parent_child.tar.gz --directory=" . $destination_folder . " .";
         $output = shell_exec($command_line);
         recursive_rmdir($destination_folder);
     }
     // remove temp dir
     $parts = pathinfo($this->text_path["eol_xml"]);
     recursive_rmdir($parts["dirname"]);
     //debug - comment if you want to see: images_not_in_xls.txt
     debug("\n temporary directory removed: " . $parts["dirname"]);
 }
 private function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::lookup_with_cache($this->zip_path, array('timeout' => 3600, 'download_attempts' => 2, 'delay_in_minutes' => 1))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/sd_specimen.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/sd_specimen.txt")) {
                 return;
             }
         }
         $this->text_path["sd_specimen"] = $this->TEMP_FILE_PATH . "/sd_specimen.txt";
         $this->text_path["sd_specimen_identifier"] = $this->TEMP_FILE_PATH . "/sd_specimen_identifier.txt";
         $this->text_path["sd_macro_locality"] = $this->TEMP_FILE_PATH . "/sd_macro_locality.txt";
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return;
     }
 }
Пример #23
0
 private function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     $options = $this->download_options;
     $options['timeout'] = 999999;
     if ($file_contents = Functions::get_remote_file($this->zip_path, $options)) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv")) {
                 return false;
             }
         }
         $this->text_path["IRMNG_DWC"] = $this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv";
         $this->text_path["IRMNG_DWC_SP_PROFILE"] = $this->TEMP_FILE_PATH . "/IRMNG_DWC_SP_PROFILE_20140131.csv";
         return true;
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return false;
     }
 }
Пример #24
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->zip_path, array('timeout' => 172800, 'download_attempts' => 2))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/species.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/species.txt")) {
                 return;
             }
         }
         $this->text_path["statuses"] = $this->TEMP_FILE_PATH . "/statuses.txt";
         $this->text_path["classification"] = $this->TEMP_FILE_PATH . "/classification.txt";
         $this->text_path["species"] = $this->TEMP_FILE_PATH . "/species.txt";
         $this->text_path["class"] = $this->TEMP_FILE_PATH . "/class.txt";
         $this->text_path["subclass"] = $this->TEMP_FILE_PATH . "/subclass.txt";
         $this->text_path["superorder"] = $this->TEMP_FILE_PATH . "/superorder.txt";
         $this->text_path["order"] = $this->TEMP_FILE_PATH . "/order.txt";
         $this->text_path["subgenus"] = $this->TEMP_FILE_PATH . "/subgenus.txt";
         $this->text_path["genus"] = $this->TEMP_FILE_PATH . "/genus.txt";
         $this->text_path["family"] = $this->TEMP_FILE_PATH . "/family.txt";
         $this->text_path["specimen"] = $this->TEMP_FILE_PATH . "/specimen.txt";
         $this->text_path["distribution"] = $this->TEMP_FILE_PATH . "/distribution.txt";
         $this->text_path["specimen_images"] = $this->TEMP_FILE_PATH . "/specimen_images_v2.txt";
         $this->text_path["species_images"] = $this->TEMP_FILE_PATH . "/species_images.txt";
         $this->text_path["references"] = $this->TEMP_FILE_PATH . "/references.txt";
         $this->text_path["image_references"] = $this->TEMP_FILE_PATH . "/image_references.txt";
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return;
     }
 }
Пример #25
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::lookup_with_cache($this->zip_path, array('timeout' => 172800, 'download_attempts' => 5))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
                 return false;
             }
         }
         $this->text_path[1] = $this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt";
         $this->text_path[2] = $this->TEMP_FILE_PATH . "/Hds2-Hymenoptera-Final.txt";
         $this->text_path[3] = $this->TEMP_FILE_PATH . "/Hds3-Hymenoptera-Final.txt";
         $this->text_path[4] = $this->TEMP_FILE_PATH . "/Hds4-Hymenoptera-Final.txt";
         $this->text_path[5] = $this->TEMP_FILE_PATH . "/Hds5-Hymenoptera-Final.txt";
         $this->text_path[6] = $this->TEMP_FILE_PATH . "/Hds6-Hymenoptera-Final.txt";
         $this->text_path[7] = $this->TEMP_FILE_PATH . "/Hds7-Hymenoptera-Final.txt";
         $this->text_path[8] = $this->TEMP_FILE_PATH . "/Hds8-Hymenoptera-Final.txt";
         $this->text_path[9] = $this->TEMP_FILE_PATH . "/Hds9-Hymenoptera-Final.txt";
         $this->text_path[10] = $this->TEMP_FILE_PATH . "/Hds10-Hymenoptera-Final.txt";
         $this->text_path[11] = $this->TEMP_FILE_PATH . "/HymEcoParDone.txt";
         return true;
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return false;
     }
 }
Пример #26
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->zip_path, array('timeout' => 172800, 'download_attempts' => 5))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         $TMP = fopen($temp_file_path, "w");
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("tar -xzf {$temp_file_path} -C {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
                 return FALSE;
             }
         }
         $this->text_path[1] = $this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt";
         $this->text_path[2] = $this->TEMP_FILE_PATH . "/Hds2-Hymenoptera-Final.txt";
         $this->text_path[3] = $this->TEMP_FILE_PATH . "/Hds3-Hymenoptera-Final.txt";
         $this->text_path[4] = $this->TEMP_FILE_PATH . "/Hds4-Hymenoptera-Final.txt";
         $this->text_path[5] = $this->TEMP_FILE_PATH . "/Hds5-Hymenoptera-Final.txt";
         $this->text_path[6] = $this->TEMP_FILE_PATH . "/Hds6-Hymenoptera-Final.txt";
         $this->text_path[7] = $this->TEMP_FILE_PATH . "/Hds7-Hymenoptera-Final.txt";
         $this->text_path[8] = $this->TEMP_FILE_PATH . "/Hds8-Hymenoptera-Final.txt";
         $this->text_path[9] = $this->TEMP_FILE_PATH . "/Hds9-Hymenoptera-Final.txt";
         $this->text_path[10] = $this->TEMP_FILE_PATH . "/Hds10-Hymenoptera-Final.txt";
         $this->text_path[11] = $this->TEMP_FILE_PATH . "/HymEcoParDone.txt";
         return TRUE;
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return FALSE;
     }
 }