function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::divide_text_file(10000); //orig value 10000 debug Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml"); } }
function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); $batch = Functions::create_work_list_from_master_file($this->MASTER_LIST, 5000, $this->TEMP_FILE_PATH, "batch_", $this->WORK_LIST); //debug orig value 5000 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this, $batch); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_Bolds_batch_*.xml"); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_Bolds_" . "batch_", "xml"); } }
public function get_all_taxa($resource_id) { // Delete temp files, possible remnants from interrupted runs Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); //divide big file to a more consumable chunks $file_count = self::divide_big_csv_file(40000); //debug orig is 40000 if ($file_count === false) { return false; } $all_taxa = array(); $used_collection_ids = array(); for ($i = 1; $i <= $file_count; $i++) { echo "\nprocessing {$i} => \n"; $arr = self::get_obis_taxa($this->OBIS_DATA_PATH . "temp_" . $i . ".csv", $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; $xml = \SchemaDocument::get_taxon_xml($page_taxa); $resource_path = $this->OBIS_DATA_PATH . "temp_obis_" . $i . ".xml"; if (!($OUT = Functions::file_open($resource_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); } // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->OBIS_DATA_PATH . "temp_obis_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "sl_batch_*.xml"); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "xml"); //debug Don't delete it if you want to check subsets of the resource XML. } }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); // this will prepare a list of all species id; 13 mins. execution self::build_id_list(); // divides the big list of ids into small files self::divide_text_file(10000); //debug orig 10000, for testing use 5 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); //remove a task from task list } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // step 3: this should only run when all of instances of step 2 are done sleep(10); //debug orig 10 Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_tropicos_batch_*.xml"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_tropicos_batch_", "xml"); //debug comment this line if u want to have a source for checking encoding probs in the XML Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::set_resource_status_to_force_harvest($resource_id); } }
require_library('connectors/ConabioAPI'); $resource_id = 106; $func = new ConabioAPI(); $func->combine_all_xmls($resource_id); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; // /* working well - replaces Class='Insecta' to 'Reptilia' if Order=='Squamata' --- WEB-5509 require_library('ResourceDataObjectElementsSetting'); $func = new ResourceDataObjectElementsSetting($resource_id, $resource_path); $xml = file_get_contents($resource_path); $xml = $func->replace_taxon_element_value_with_condition("dwc:Class", "Insecta", "Reptilia", $xml, "dwc:Order", "Squamata"); $func->save_resource_document($xml); // */ // start - this will get Tamborines videos from Vimeo and append it with the main resource 106.xml (DATA-1592) Functions::file_rename($resource_path, CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine1.xml"); get_videos_from_vimeo(); Functions::combine_all_eol_resource_xmls($resource_id, CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine*.xml"); unlink(CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine1.xml"); unlink(CONTENT_RESOURCE_LOCAL_PATH . "temp_vimeo_to_tamborine2.xml"); // end if (filesize($resource_path) > 1000) { Functions::set_resource_status_to_force_harvest($resource_id); Functions::gzip_resource_xml($resource_id); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; function get_videos_from_vimeo() { echo "\n -- start access to vimeo "; $resource_id = "temp_vimeo_to_tamborine2";
public function get_all_taxa($resource_id) { $all_taxa = array(); $this->used_collection_ids = array(); $urls = array(FWP_SPECIES_DOC_PATH); // you can have multiple sources of the species.xls $taxa_arr = self::compile_taxa($urls); require_library('XLSParser'); $parser = new XLSParser(); $images = self::prepare_table($parser->convert_sheet_to_array(FWP_IMAGES_DOC_PATH), "multiple", "SId", "SId", "PictureId", "dbo_Picture_PictureNote", "PictureType", "IsLegal", "Location", "PicComments", "IsAvailable", "LifeStage", "CollectionName", "CollectionAcronym", "PictureSource", "Surname", "Firstname", "DisplayName", "FileName"); $comnames = self::prepare_table($parser->convert_sheet_to_array(FWP_COMNAMES_DOC_PATH), "multiple", "SId", "CommonName", "Language"); $synonyms = self::prepare_table($parser->convert_sheet_to_array(FWP_SYNONYMS_DOC_PATH), "multiple", "SId", "SynGenusSpecies", "SynStatus"); $i = 1; $total = sizeof($taxa_arr); $j = 0; foreach ($taxa_arr as $taxon_arr) { echo "\n {$i} of {$total} -- " . $taxon_arr['SId']; $i++; $taxon_id = $taxon_arr['SId']; $page_taxa = self::get_fishwise_taxa($taxon_arr, @$images[$taxon_id], @$comnames[$taxon_id], @$synonyms[$taxon_id]); $all_taxa = array_merge($all_taxa, $page_taxa); if ($i % 10000 == 0) { $j++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $j_str = Functions::format_number_with_leading_zeros($j, 3); $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $all_taxa = array(); } } if ($all_taxa) { $j++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $j_str = Functions::format_number_with_leading_zeros($j, 3); $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); } Functions::combine_all_eol_resource_xmls($resource_id, DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml"); self::delete_files(DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml"); return; }
function get_all_taxa($resource_id) { $data = self::prepare_data(); if ($data === false) { return false; } $taxa = $data["taxon"]; $taxon_comnames = $data["taxon_comnames"]; $taxon_references = $data["taxon_references"]; $taxon_synonyms = $data["taxon_synonyms"]; $taxon_dataobject = $data["taxon_dataobject"]; $GLOBALS['taxon_dataobject_agent'] = $data["taxon_dataobject_agent"]; $GLOBALS['taxon_dataobject_reference'] = $data["taxon_dataobject_reference"]; $all_taxa = array(); $i = 0; $total = count(array_keys($taxa)); $batch = 1000; //debug orig 1000 $batch_count = 0; foreach ($taxa as $taxon) { // if($taxon["dc_identifier"] != "FB-47873") continue; // debug $i++; debug("\n{$i} of {$total} " . $taxon["dwc_ScientificName"]); $taxon_record["taxon"] = $taxon; $taxon_id = $taxon["int_id"]; $taxon_record["common_names"] = @$taxon_comnames[$taxon_id]; $taxon_record["references"] = @$taxon_references[$taxon_id]; $taxon_record["synonyms"] = @$taxon_synonyms[$taxon_id]; $taxon_record["dataobjects"] = @$taxon_dataobject[$taxon_id]; $arr = self::get_FishBase_taxa($taxon_record); $page_taxa = $arr[0]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); if ($i % $batch == 0) { $batch_count++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $all_taxa = array(); } } //last batch $batch_count++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "FB_*.xml"); self::delete_temp_files($this->TEMP_FILE_PATH . "FB_*.xml"); // remove tmp dir $this->TEMP_FILE_PATH = str_ireplace("/fishbase", "", $this->TEMP_FILE_PATH); if ($this->TEMP_FILE_PATH) { shell_exec("rm -fr {$this->TEMP_FILE_PATH}"); } if ($this->test_run) { return $all_taxa; } //used in testing }