function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::divide_text_file(10000); //orig value 10000 debug Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml"); } }
function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); $batch = Functions::create_work_list_from_master_file($this->MASTER_LIST, 5000, $this->TEMP_FILE_PATH, "batch_", $this->WORK_LIST); //debug orig value 5000 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this, $batch); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_Bolds_batch_*.xml"); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_Bolds_" . "batch_", "xml"); } }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { require_library('connectors/BOLDSysAPI'); $this->func = new BOLDSysAPI(); $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); $this->func->create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { $this->archive_builder->finalize(true); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); } }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "sl_batch_*.xml"); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "xml"); //debug Don't delete it if you want to check subsets of the resource XML. } }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); // this will prepare a list of all species id; 13 mins. execution self::build_id_list(); // divides the big list of ids into small files self::divide_text_file(10000); //debug orig 10000, for testing use 5 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); //remove a task from task list } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // step 3: this should only run when all of instances of step 2 are done sleep(10); //debug orig 10 Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_tropicos_batch_*.xml"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_tropicos_batch_", "xml"); //debug comment this line if u want to have a source for checking encoding probs in the XML Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::set_resource_status_to_force_harvest($resource_id); } }
function get_id_list() { $urls = array(); // $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2011_small.xml"; //for debug $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2007.xml"; $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2008.xml"; $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2009.xml"; $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2010.xml"; $urls[] = "http://dl.dropbox.com/u/7597512/WORMS/2011.xml"; $urls[] = "http://dl.dropbox.com/u/7597512/WORMS/2012.xml"; //append year (the current year) and onwards $urls = self::generate_url_list($urls); /* debug $r = array(); $r[] = $urls[0]; $urls = $r; */ echo "\n URLs = " . sizeof($urls) . "\n"; print_r($urls); $ids = array(); $file_ctr = 0; foreach ($urls as $url) { echo "\n Processing: {$url} \n"; if ($xml = Functions::get_hashed_response($url, array('timeout' => 240, 'download_attempts' => 5))) { foreach ($xml->taxdetail as $taxdetail) { $id = @$taxdetail["id"]; $ids[] = $id; } } sleep(30); //debug orig 30 } //delete temp XML files Functions::delete_temp_files($this->TEMP_FILE_PATH . "xmlcontent_", "xml"); $ids = array_unique($ids); echo "\n total ids: " . sizeof($ids); echo "\n" . sizeof($urls) . " URLs | taxid count = " . sizeof($ids) . "\n"; /* debug $r = array(); $r[] = $ids[0]; $r[] = $ids[1]; $r[] = $ids[2]; $r[] = $ids[3]; $r[] = $ids[4]; $r[] = $ids[5]; $r[] = $ids[6]; $r[] = $ids[7]; $r[] = $ids[8]; $r[] = $ids[9]; $r[] = $ids[10]; $r[] = $ids[11]; $r[] = $ids[12]; $r[] = $ids[13]; $r[] = $ids[14]; $r[] = $ids[15]; $r[] = $ids[16]; $r[] = $ids[17]; $r[] = $ids[18]; $r[] = $ids[19]; $ids = $r; */ // $ids = array(); $ids[] = 246718;//9182;//243944; /*debug: to be used when searching for an id foreach(array(582008, 582009, 582010) as $id) { if(in_array($id, $ids)) echo "\n $id found"; else echo "\n $id not found"; } */ return $ids; }
private function divide_big_csv_file($divisor) { Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); $i = 0; $line = ""; $file_count = 0; $labels = ""; foreach (new FileIterator($this->OBIS_DATA_FILE) as $line_number => $linex) { $i++; $line .= $linex . "\n"; // FileIterator removes the carriage-return if (!$labels) { $labels = $line; $line = ""; continue; } if ($i == $divisor) { $i = 0; $file_count++; if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) { return; } fwrite($OUT, $labels); fwrite($OUT, $line); fclose($OUT); $line = ""; } } // last writes if ($line) { $file_count++; if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) { return; } fwrite($OUT, $labels); fwrite($OUT, $line); fclose($OUT); } return $file_count; }