function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { require_library('connectors/BOLDSysAPI'); $this->func = new BOLDSysAPI(); $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); $this->func->create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { $this->archive_builder->finalize(true); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); } }
<?php namespace php_active_record; /* NCBI Bio Projects partner provides an XML dump estimated execution time: */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/NCBIProjectsAPI'); $timestart = time_elapsed(); $resource_id = 173; $func = new NCBIProjectsAPI($resource_id); $func->get_all_taxa(); if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working/taxon.tab") > 1000) { if (is_dir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) { recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id, CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); } Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working", CONTENT_RESOURCE_LOCAL_PATH . $resource_id); Functions::set_resource_status_to_force_harvest($resource_id); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n\n elapsed time = " . $elapsed_time_sec / 60 . " minutes"; echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours"; echo "\n Done processing.\n";
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "sl_batch_*.xml"); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "xml"); //debug Don't delete it if you want to check subsets of the resource XML. } }
include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/I3InteractiveAPI'); $timestart = time_elapsed(); $params["resource_id"] = 332; $params["process occurrence"] = false; $params["dwca_file"] = "http://localhost/cp/3IInteractive/DwCArchive_Cicadellinae.zip"; $params["dwca_file"] = "http://dmitriev.speciesfile.org/Export/DwCArchive_Cicadellinae.zip"; $func = new I3InteractiveAPI($params); $func->get_all_taxa(); if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . "_working/taxon.tab") > 1000) { if (is_dir(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"])) { recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . "_previous"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"], CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . "_previous"); } Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . "_working", CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"]); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . "_working.tar.gz", CONTENT_RESOURCE_LOCAL_PATH . $params["resource_id"] . ".tar.gz"); Functions::set_resource_status_to_force_harvest($params["resource_id"]); Functions::count_resource_tab_files($params["resource_id"]); if ($undefined_uris = Functions::get_undefined_uris_from_resource($params["resource_id"])) { print_r($undefined_uris); } echo "\nUndefined URIs: " . count($undefined_uris) . "\n"; require_library('connectors/DWCADiagnoseAPI'); $func = new DWCADiagnoseAPI(); $func->check_unique_ids($params["resource_id"]); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n\n"; echo "\n elapsed time = " . $elapsed_time_sec / 60 . " minutes"; echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours"; echo "\n Done processing.\n";
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); // this will prepare a list of all species id; 13 mins. execution self::build_id_list(); // divides the big list of ids into small files self::divide_text_file(10000); //debug orig 10000, for testing use 5 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); //remove a task from task list } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // step 3: this should only run when all of instances of step 2 are done sleep(10); //debug orig 10 Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_tropicos_batch_*.xml"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_tropicos_batch_", "xml"); //debug comment this line if u want to have a source for checking encoding probs in the XML Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::set_resource_status_to_force_harvest($resource_id); } }
public static function finalize_dwca_resource($resource_id, $big_file = false) { if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working/taxon.tab") > 200) { if (is_dir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) { recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id, CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); } Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working", CONTENT_RESOURCE_LOCAL_PATH . $resource_id); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working.tar.gz", CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".tar.gz"); Functions::set_resource_status_to_force_harvest($resource_id); Functions::count_resource_tab_files($resource_id); if (!$big_file) { if ($undefined_uris = Functions::get_undefined_uris_from_resource($resource_id)) { print_r($undefined_uris); } echo "\nUndefined URIs: " . count($undefined_uris) . "\n"; require_library('connectors/DWCADiagnoseAPI'); $func = new DWCADiagnoseAPI(); $func->check_unique_ids($resource_id); } } }