예제 #1
0
 function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             self::divide_text_file(10000);
             //orig value 10000 debug
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml");
         // Set to force harvest
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml");
     }
 }
예제 #2
0
 function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             $batch = Functions::create_work_list_from_master_file($this->MASTER_LIST, 5000, $this->TEMP_FILE_PATH, "batch_", $this->WORK_LIST);
             //debug orig value 5000
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this, $batch);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_Bolds_batch_*.xml");
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_Bolds_" . "batch_", "xml");
     }
 }
예제 #3
0
 public function get_all_taxa($resource_id)
 {
     // Delete temp files, possible remnants from interrupted runs
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml");
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv");
     //divide big file to a more consumable chunks
     $file_count = self::divide_big_csv_file(40000);
     //debug orig is 40000
     if ($file_count === false) {
         return false;
     }
     $all_taxa = array();
     $used_collection_ids = array();
     for ($i = 1; $i <= $file_count; $i++) {
         echo "\nprocessing {$i} => \n";
         $arr = self::get_obis_taxa($this->OBIS_DATA_PATH . "temp_" . $i . ".csv", $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         $xml = \SchemaDocument::get_taxon_xml($page_taxa);
         $resource_path = $this->OBIS_DATA_PATH . "temp_obis_" . $i . ".xml";
         if (!($OUT = Functions::file_open($resource_path, "w"))) {
             return;
         }
         fwrite($OUT, $xml);
         fclose($OUT);
     }
     // Combine all XML files.
     Functions::combine_all_eol_resource_xmls($resource_id, $this->OBIS_DATA_PATH . "temp_obis_*.xml");
     // Set to force harvest
     if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
         $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
     }
     // Delete temp files
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml");
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv");
 }
예제 #4
0
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             self::create_master_list();
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "sl_batch_*.xml");
         // Set to force harvest
         Functions::set_resource_status_to_force_harvest($resource_id);
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "xml");
         //debug Don't delete it if you want to check subsets of the resource XML.
     }
 }
예제 #5
0
 function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = $connectors_to_run;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             // this will prepare a list of all species id; 13 mins. execution
             self::build_id_list();
             // divides the big list of ids into small files
             self::divide_text_file(10000);
             //debug orig 10000, for testing use 5
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             //remove a task from task list
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // step 3: this should only run when all of instances of step 2 are done
         sleep(10);
         //debug orig 10
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_tropicos_batch_*.xml");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_tropicos_batch_", "xml");
         //debug comment this line if u want to have a source for checking encoding probs in the XML
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::set_resource_status_to_force_harvest($resource_id);
     }
 }
예제 #6
0
require_library('connectors/ConabioAPI');
$resource_id = 106;
$func = new ConabioAPI();
$func->combine_all_xmls($resource_id);
$resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
// /* working well - replaces Class='Insecta' to 'Reptilia' if Order=='Squamata' --- WEB-5509
require_library('ResourceDataObjectElementsSetting');
$func = new ResourceDataObjectElementsSetting($resource_id, $resource_path);
$xml = file_get_contents($resource_path);
$xml = $func->replace_taxon_element_value_with_condition("dwc:Class", "Insecta", "Reptilia", $xml, "dwc:Order", "Squamata");
$func->save_resource_document($xml);
// */
// start - this will get Tamborines videos from Vimeo and append it with the main resource 106.xml (DATA-1592)
Functions::file_rename($resource_path, CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine1.xml");
get_videos_from_vimeo();
Functions::combine_all_eol_resource_xmls($resource_id, CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine*.xml");
unlink(CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine1.xml");
unlink(CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine2.xml");
// end
if (filesize($resource_path) > 1000) {
    Functions::set_resource_status_to_force_harvest($resource_id);
    Functions::gzip_resource_xml($resource_id);
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n";
function get_videos_from_vimeo()
{
    echo "\n -- start access to vimeo ";
    $resource_id = "temp_vimeo_to_tamborine2";
예제 #7
0
 public function get_all_taxa($resource_id)
 {
     $all_taxa = array();
     $this->used_collection_ids = array();
     $urls = array(FWP_SPECIES_DOC_PATH);
     // you can have multiple sources of the species.xls
     $taxa_arr = self::compile_taxa($urls);
     require_library('XLSParser');
     $parser = new XLSParser();
     $images = self::prepare_table($parser->convert_sheet_to_array(FWP_IMAGES_DOC_PATH), "multiple", "SId", "SId", "PictureId", "dbo_Picture_PictureNote", "PictureType", "IsLegal", "Location", "PicComments", "IsAvailable", "LifeStage", "CollectionName", "CollectionAcronym", "PictureSource", "Surname", "Firstname", "DisplayName", "FileName");
     $comnames = self::prepare_table($parser->convert_sheet_to_array(FWP_COMNAMES_DOC_PATH), "multiple", "SId", "CommonName", "Language");
     $synonyms = self::prepare_table($parser->convert_sheet_to_array(FWP_SYNONYMS_DOC_PATH), "multiple", "SId", "SynGenusSpecies", "SynStatus");
     $i = 1;
     $total = sizeof($taxa_arr);
     $j = 0;
     foreach ($taxa_arr as $taxon_arr) {
         echo "\n {$i} of {$total} -- " . $taxon_arr['SId'];
         $i++;
         $taxon_id = $taxon_arr['SId'];
         $page_taxa = self::get_fishwise_taxa($taxon_arr, @$images[$taxon_id], @$comnames[$taxon_id], @$synonyms[$taxon_id]);
         $all_taxa = array_merge($all_taxa, $page_taxa);
         if ($i % 10000 == 0) {
             $j++;
             $xml = \SchemaDocument::get_taxon_xml($all_taxa);
             $j_str = Functions::format_number_with_leading_zeros($j, 3);
             $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml";
             if (!($OUT = fopen($resource_path, "w+"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
                 return;
             }
             fwrite($OUT, $xml);
             fclose($OUT);
             $all_taxa = array();
         }
     }
     if ($all_taxa) {
         $j++;
         $xml = \SchemaDocument::get_taxon_xml($all_taxa);
         $j_str = Functions::format_number_with_leading_zeros($j, 3);
         $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml";
         if (!($OUT = fopen($resource_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
             return;
         }
         fwrite($OUT, $xml);
         fclose($OUT);
     }
     Functions::combine_all_eol_resource_xmls($resource_id, DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml");
     self::delete_files(DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml");
     return;
 }
예제 #8
0
 function get_all_taxa($resource_id)
 {
     $data = self::prepare_data();
     if ($data === false) {
         return false;
     }
     $taxa = $data["taxon"];
     $taxon_comnames = $data["taxon_comnames"];
     $taxon_references = $data["taxon_references"];
     $taxon_synonyms = $data["taxon_synonyms"];
     $taxon_dataobject = $data["taxon_dataobject"];
     $GLOBALS['taxon_dataobject_agent'] = $data["taxon_dataobject_agent"];
     $GLOBALS['taxon_dataobject_reference'] = $data["taxon_dataobject_reference"];
     $all_taxa = array();
     $i = 0;
     $total = count(array_keys($taxa));
     $batch = 1000;
     //debug orig 1000
     $batch_count = 0;
     foreach ($taxa as $taxon) {
         // if($taxon["dc_identifier"] != "FB-47873") continue; // debug
         $i++;
         debug("\n{$i} of {$total} " . $taxon["dwc_ScientificName"]);
         $taxon_record["taxon"] = $taxon;
         $taxon_id = $taxon["int_id"];
         $taxon_record["common_names"] = @$taxon_comnames[$taxon_id];
         $taxon_record["references"] = @$taxon_references[$taxon_id];
         $taxon_record["synonyms"] = @$taxon_synonyms[$taxon_id];
         $taxon_record["dataobjects"] = @$taxon_dataobject[$taxon_id];
         $arr = self::get_FishBase_taxa($taxon_record);
         $page_taxa = $arr[0];
         if ($page_taxa) {
             $all_taxa = array_merge($all_taxa, $page_taxa);
         }
         unset($page_taxa);
         if ($i % $batch == 0) {
             $batch_count++;
             $xml = \SchemaDocument::get_taxon_xml($all_taxa);
             $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml";
             if (!($OUT = fopen($resource_path, "w"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
                 return;
             }
             fwrite($OUT, $xml);
             fclose($OUT);
             $all_taxa = array();
         }
     }
     //last batch
     $batch_count++;
     $xml = \SchemaDocument::get_taxon_xml($all_taxa);
     $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml";
     if (!($OUT = fopen($resource_path, "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "FB_*.xml");
     self::delete_temp_files($this->TEMP_FILE_PATH . "FB_*.xml");
     // remove tmp dir
     $this->TEMP_FILE_PATH = str_ireplace("/fishbase", "", $this->TEMP_FILE_PATH);
     if ($this->TEMP_FILE_PATH) {
         shell_exec("rm -fr {$this->TEMP_FILE_PATH}");
     }
     if ($this->test_run) {
         return $all_taxa;
     }
     //used in testing
 }