Пример #1
0
 function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             self::divide_text_file(10000);
             //orig value 10000 debug
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml");
         // Set to force harvest
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml");
     }
 }
Пример #2
0
 function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             $batch = Functions::create_work_list_from_master_file($this->MASTER_LIST, 5000, $this->TEMP_FILE_PATH, "batch_", $this->WORK_LIST);
             //debug orig value 5000
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this, $batch);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_Bolds_batch_*.xml");
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_Bolds_" . "batch_", "xml");
     }
 }
Пример #3
0
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     require_library('connectors/BOLDSysAPI');
     $this->func = new BOLDSysAPI();
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             $this->func->create_master_list();
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         $this->archive_builder->finalize(true);
         // Set to force harvest
         Functions::set_resource_status_to_force_harvest($resource_id);
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt");
     }
 }
Пример #4
0
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             self::create_master_list();
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "sl_batch_*.xml");
         // Set to force harvest
         Functions::set_resource_status_to_force_harvest($resource_id);
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "xml");
         //debug Don't delete it if you want to check subsets of the resource XML.
     }
 }
Пример #5
0
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             // this will prepare a list of all species id; 13 mins. execution
             self::build_id_list();
             // divides the big list of ids into small files
             self::divide_text_file(10000);
             //debug orig 10000, for testing use 5
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             //remove a task from task list
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // step 3: this should only run when all of instances of step 2 are done
         sleep(10);
         //debug orig 10
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_tropicos_batch_*.xml");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_tropicos_batch_", "xml");
         //debug comment this line if u want to have a source for checking encoding probs in the XML
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::set_resource_status_to_force_harvest($resource_id);
     }
 }
Пример #6
0
 function get_id_list()
 {
     $urls = array();
     // $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2011_small.xml"; //for debug
     $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2007.xml";
     $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2008.xml";
     $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2009.xml";
     $urls[] = DOC_ROOT . "/update_resources/connectors/files/WORMS/2010.xml";
     $urls[] = "http://dl.dropbox.com/u/7597512/WORMS/2011.xml";
     $urls[] = "http://dl.dropbox.com/u/7597512/WORMS/2012.xml";
     //append year (the current year) and onwards
     $urls = self::generate_url_list($urls);
     /* debug
        $r = array();
        $r[] = $urls[0];
        $urls = $r;
        */
     echo "\n URLs = " . sizeof($urls) . "\n";
     print_r($urls);
     $ids = array();
     $file_ctr = 0;
     foreach ($urls as $url) {
         echo "\n Processing: {$url} \n";
         if ($xml = Functions::get_hashed_response($url, array('timeout' => 240, 'download_attempts' => 5))) {
             foreach ($xml->taxdetail as $taxdetail) {
                 $id = @$taxdetail["id"];
                 $ids[] = $id;
             }
         }
         sleep(30);
         //debug orig 30
     }
     //delete temp XML files
     Functions::delete_temp_files($this->TEMP_FILE_PATH . "xmlcontent_", "xml");
     $ids = array_unique($ids);
     echo "\n total ids: " . sizeof($ids);
     echo "\n" . sizeof($urls) . " URLs | taxid count = " . sizeof($ids) . "\n";
     /* debug
        $r = array();
        $r[] = $ids[0];
        $r[] = $ids[1];
        $r[] = $ids[2];
        $r[] = $ids[3];
        $r[] = $ids[4];
        $r[] = $ids[5];
        $r[] = $ids[6];
        $r[] = $ids[7];
        $r[] = $ids[8];
        $r[] = $ids[9];
        $r[] = $ids[10];
        $r[] = $ids[11];
        $r[] = $ids[12];
        $r[] = $ids[13];
        $r[] = $ids[14];
        $r[] = $ids[15];
        $r[] = $ids[16];
        $r[] = $ids[17];
        $r[] = $ids[18];
        $r[] = $ids[19];
        $ids = $r;
        */
     // $ids = array(); $ids[] = 246718;//9182;//243944;
     /*debug: to be used when searching for an id
       foreach(array(582008, 582009, 582010) as $id)
       {
           if(in_array($id, $ids)) echo "\n $id found";
           else echo "\n $id not found";
       }
       */
     return $ids;
 }
Пример #7
0
 private function divide_big_csv_file($divisor)
 {
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv");
     $i = 0;
     $line = "";
     $file_count = 0;
     $labels = "";
     foreach (new FileIterator($this->OBIS_DATA_FILE) as $line_number => $linex) {
         $i++;
         $line .= $linex . "\n";
         // FileIterator removes the carriage-return
         if (!$labels) {
             $labels = $line;
             $line = "";
             continue;
         }
         if ($i == $divisor) {
             $i = 0;
             $file_count++;
             if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) {
                 return;
             }
             fwrite($OUT, $labels);
             fwrite($OUT, $line);
             fclose($OUT);
             $line = "";
         }
     }
     // last writes
     if ($line) {
         $file_count++;
         if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) {
             return;
         }
         fwrite($OUT, $labels);
         fwrite($OUT, $line);
         fclose($OUT);
     }
     return $file_count;
 }