function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); self::divide_text_file(10000); //orig value 10000 debug Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml"); } }
private function set_resource_status_to_force_harvest() { // the resource XML response declaration is 516 bytes, so we're checking for something // slightly larger than that to make sure we don't have a file with a response // and no content if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . ".xml") > 600) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $this->resource_id); } }
function start_process($resource_id, $call_multiple_instance) { self::$TEMP_FILE_PATH = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/"; self::$WORK_LIST = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/work_list.txt"; self::$WORK_IN_PROGRESS_LIST = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/work_in_progress_list.txt"; self::$INITIAL_PROCESS_STATUS = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/initial_process_status.txt"; self::$TEXT_FILE_FOR_DL = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/names_without_pages_in_eol.txt"; //report back to DiscoverLife if (!trim(Functions::get_a_task(self::$WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task(self::$INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", self::$INITIAL_PROCESS_STATUS); self::divide_text_file(10000); //orig value 10000 Functions::delete_a_task("Initial process start", self::$INITIAL_PROCESS_STATUS); } } // Run multiple instances, for DiscoverLife ideally a total of 2 while (true) { $task = Functions::get_a_task(self::$WORK_LIST); //get a task to work on if ($task) { print "\n Process this: {$task}"; Functions::delete_a_task($task, self::$WORK_LIST); Functions::add_a_task($task, self::$WORK_IN_PROGRESS_LIST); $task = str_ireplace("\n", "", $task); //remove carriage return got from text file if ($call_multiple_instance) { Functions::run_another_connector_instance($resource_id, 1); //call 1 other instance for a total of 2 instances running $call_multiple_instance = 0; } self::get_all_taxa($task); print "\n Task {$task} is done. \n"; Functions::delete_a_task("{$task}\n", self::$WORK_IN_PROGRESS_LIST); //remove a task from task list } else { print "\n\n [{$task}] Work list done --- " . date('Y-m-d h:i:s a', time()) . "\n"; break; } } if (!($task = trim(Functions::get_a_task(self::$WORK_IN_PROGRESS_LIST)))) { // Combine all XML files. self::combine_all_xmls($resource_id); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::insert('Force Harvest') . " WHERE id=" . $resource_id); } // Delete temp files self::delete_temp_files(self::$TEMP_FILE_PATH . "batch_", "txt"); self::delete_temp_files(CONTENT_RESOURCE_LOCAL_PATH . "DiscoverLife/temp_DiscoverLife_" . "batch_", "xml"); } }
public function get_all_taxa($resource_id) { // Delete temp files, possible remnants from interrupted runs Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); //divide big file to a more consumable chunks $file_count = self::divide_big_csv_file(40000); //debug orig is 40000 if ($file_count === false) { return false; } $all_taxa = array(); $used_collection_ids = array(); for ($i = 1; $i <= $file_count; $i++) { echo "\nprocessing {$i} => \n"; $arr = self::get_obis_taxa($this->OBIS_DATA_PATH . "temp_" . $i . ".csv", $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; $xml = \SchemaDocument::get_taxon_xml($page_taxa); $resource_path = $this->OBIS_DATA_PATH . "temp_obis_" . $i . ".xml"; if (!($OUT = Functions::file_open($resource_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); } // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->OBIS_DATA_PATH . "temp_obis_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); }
function start_process($resource_id, $call_multiple_instance) { $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = 1; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); // step 1: divides the big list of ids into small files $ids = self::get_id_list(); self::divide_text_file(10000, $ids); //debug original value 10000 Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); //removes a task from task list } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { // step 3: Combine all XML files. This only runs when all of instances of step 2 are done self::combine_all_xmls($resource_id); // set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt"); Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_worms_" . "batch_", "xml"); } self::save_bad_ids_to_txt(); }
public static function set_resource_status_to_force_harvest($resource_id) { if (file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } } elseif (file_exists(CONTENT_RESOURCE_LOCAL_PATH . "/{$resource_id}/taxon.tab")) { if (filesize(CONTENT_RESOURCE_LOCAL_PATH . "/{$resource_id}/taxon.tab") > 600) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } } }
} } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } $new_resource_xml = \SchemaDocument::get_taxon_xml($schema_taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($old_resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path); return; } fwrite($OUT, $new_resource_xml); fclose($OUT); // set MorphBank to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; echo "\n\n Done processing."; function get_data_object($id, $created, $modified, $rightsHolder, $license, $agent, $description, $type) { $dataObjectParameters = array(); if ($type == "text") { $dataObjectParameters["identifier"] = "txt_" . $id; $dataObjectParameters["title"] = "Specimen Info"; $dataObjectParameters["subjects"] = array(); $subjectParameters = array(); $subjectParameters["label"] = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription";
<?php namespace php_active_record; /* connector for Learning + Education Group Partner provides RSS feed. estimated execution time: just a few seconds */ include_once dirname(__FILE__) . "/../../config/environment.php"; $timestart = time_elapsed(); require_library('connectors/LearningEducationAPI'); $taxa = LearningEducationAPI::get_all_taxa(); $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . "257_temp.xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); if (filesize($resource_path) > 600) { Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . "257.xml", CONTENT_RESOURCE_LOCAL_PATH . "257_previous.xml"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . "257_temp.xml", CONTENT_RESOURCE_LOCAL_PATH . "257.xml"); $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::find_or_create_by_translated_label('Force Harvest')->id . " WHERE id=257"); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} seconds \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "\n\n Done processing.";
} if ($resource->service_type_id != ServiceType::find_or_create_by_translated_label("EOL Transfer Schema")->id) { continue; } if (!in_array($resource, $resources)) { $resources[] = $resource; } } foreach ($resources as $resource) { // check the file's modified date and when it was last harvested if (!$resource->ready_to_update() && !$resource->ready_to_harvest(10)) { continue; } if ($resource->id == 11) { continue; } //biolib.cz if ($resource->id == 42) { continue; } //fishbase // if($resource->id!=59) continue; if ($resource->accesspoint_url) { echo "{$resource->id} {$resource->accesspoint_url}\n"; $new_resource_path = $manager->grab_file($resource->accesspoint_url, "resource", array('resource_id' => $resource->id, 'timeout' => 600)); if (!$new_resource_path) { $mysqli->update("UPDATE resources SET resource_status_id=" . ResourceStatus::find_or_create_by_translated_label("Upload Failed")->id . " WHERE id={$resource->id}"); } } } $log->finished();