function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             self::divide_text_file(10000);
             //orig value 10000 debug
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "temp_DiscoverLife_batch_*.xml");
         // Set to force harvest
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
         // Delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_DiscoverLife_" . "batch_", "xml");
     }
 }
Beispiel #2
0
 private function set_resource_status_to_force_harvest()
 {
     // the resource XML response declaration is 516 bytes, so we're checking for something
     // slightly larger than that to make sure we don't have a file with a response
     // and no content
     if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . ".xml") > 600) {
         $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $this->resource_id);
     }
 }
 function start_process($resource_id, $call_multiple_instance)
 {
     self::$TEMP_FILE_PATH = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/";
     self::$WORK_LIST = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/work_list.txt";
     self::$WORK_IN_PROGRESS_LIST = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/work_in_progress_list.txt";
     self::$INITIAL_PROCESS_STATUS = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/initial_process_status.txt";
     self::$TEXT_FILE_FOR_DL = DOC_ROOT . "/update_resources/connectors/files/DiscoverLife/names_without_pages_in_eol.txt";
     //report back to DiscoverLife
     if (!trim(Functions::get_a_task(self::$WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task(self::$INITIAL_PROCESS_STATUS))) {
             // Divide the big list of ids into small files
             Functions::add_a_task("Initial process start", self::$INITIAL_PROCESS_STATUS);
             self::divide_text_file(10000);
             //orig value 10000
             Functions::delete_a_task("Initial process start", self::$INITIAL_PROCESS_STATUS);
         }
     }
     // Run multiple instances, for DiscoverLife ideally a total of 2
     while (true) {
         $task = Functions::get_a_task(self::$WORK_LIST);
         //get a task to work on
         if ($task) {
             print "\n Process this: {$task}";
             Functions::delete_a_task($task, self::$WORK_LIST);
             Functions::add_a_task($task, self::$WORK_IN_PROGRESS_LIST);
             $task = str_ireplace("\n", "", $task);
             //remove carriage return got from text file
             if ($call_multiple_instance) {
                 Functions::run_another_connector_instance($resource_id, 1);
                 //call 1 other instance for a total of 2 instances running
                 $call_multiple_instance = 0;
             }
             self::get_all_taxa($task);
             print "\n Task {$task} is done. \n";
             Functions::delete_a_task("{$task}\n", self::$WORK_IN_PROGRESS_LIST);
             //remove a task from task list
         } else {
             print "\n\n [{$task}] Work list done --- " . date('Y-m-d h:i:s a', time()) . "\n";
             break;
         }
     }
     if (!($task = trim(Functions::get_a_task(self::$WORK_IN_PROGRESS_LIST)))) {
         // Combine all XML files.
         self::combine_all_xmls($resource_id);
         // Set to force harvest
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::insert('Force Harvest') . " WHERE id=" . $resource_id);
         }
         // Delete temp files
         self::delete_temp_files(self::$TEMP_FILE_PATH . "batch_", "txt");
         self::delete_temp_files(CONTENT_RESOURCE_LOCAL_PATH . "DiscoverLife/temp_DiscoverLife_" . "batch_", "xml");
     }
 }
Beispiel #4
0
 public function get_all_taxa($resource_id)
 {
     // Delete temp files, possible remnants from interrupted runs
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml");
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv");
     //divide big file to a more consumable chunks
     $file_count = self::divide_big_csv_file(40000);
     //debug orig is 40000
     if ($file_count === false) {
         return false;
     }
     $all_taxa = array();
     $used_collection_ids = array();
     for ($i = 1; $i <= $file_count; $i++) {
         echo "\nprocessing {$i} => \n";
         $arr = self::get_obis_taxa($this->OBIS_DATA_PATH . "temp_" . $i . ".csv", $used_collection_ids);
         $page_taxa = $arr[0];
         $used_collection_ids = $arr[1];
         $xml = \SchemaDocument::get_taxon_xml($page_taxa);
         $resource_path = $this->OBIS_DATA_PATH . "temp_obis_" . $i . ".xml";
         if (!($OUT = Functions::file_open($resource_path, "w"))) {
             return;
         }
         fwrite($OUT, $xml);
         fclose($OUT);
     }
     // Combine all XML files.
     Functions::combine_all_eol_resource_xmls($resource_id, $this->OBIS_DATA_PATH . "temp_obis_*.xml");
     // Set to force harvest
     if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
         $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
     }
     // Delete temp files
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml");
     Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv");
 }
Beispiel #5
0
 function start_process($resource_id, $call_multiple_instance)
 {
     $this->resource_id = $resource_id;
     $this->call_multiple_instance = $call_multiple_instance;
     $this->connectors_to_run = 1;
     if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) {
         if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) {
             Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             // step 1: divides the big list of ids into small files
             $ids = self::get_id_list();
             self::divide_text_file(10000, $ids);
             //debug original value 10000
             Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS);
             //removes a task from task list
         }
     }
     Functions::process_work_list($this);
     if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) {
         // step 3: Combine all XML files. This only runs when all of instances of step 2 are done
         self::combine_all_xmls($resource_id);
         // set to force harvest
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
         // delete temp files
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "batch_", "txt");
         Functions::delete_temp_files($this->TEMP_FILE_PATH . "temp_worms_" . "batch_", "xml");
     }
     self::save_bad_ids_to_txt();
 }
Beispiel #6
0
 public static function set_resource_status_to_force_harvest($resource_id)
 {
     if (file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) {
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
     } elseif (file_exists(CONTENT_RESOURCE_LOCAL_PATH . "/{$resource_id}/taxon.tab")) {
         if (filesize(CONTENT_RESOURCE_LOCAL_PATH . "/{$resource_id}/taxon.tab") > 600) {
             $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
         }
     }
 }
Beispiel #7
0
    }
}
foreach ($used_taxa as $taxon_parameters) {
    $schema_taxa[] = new \SchemaTaxon($taxon_parameters);
}
$new_resource_xml = \SchemaDocument::get_taxon_xml($schema_taxa);
$old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
if (!($OUT = fopen($old_resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path);
    return;
}
fwrite($OUT, $new_resource_xml);
fclose($OUT);
// set MorphBank to force harvest
if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) {
    $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id);
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes   \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours  \n";
echo "\n\n Done processing.";
function get_data_object($id, $created, $modified, $rightsHolder, $license, $agent, $description, $type)
{
    $dataObjectParameters = array();
    if ($type == "text") {
        $dataObjectParameters["identifier"] = "txt_" . $id;
        $dataObjectParameters["title"] = "Specimen Info";
        $dataObjectParameters["subjects"] = array();
        $subjectParameters = array();
        $subjectParameters["label"] = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription";
Beispiel #8
0
<?php

namespace php_active_record;

/* connector for Learning + Education Group
Partner provides RSS feed.
estimated execution time: just a few seconds
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
$timestart = time_elapsed();
require_library('connectors/LearningEducationAPI');
$taxa = LearningEducationAPI::get_all_taxa();
$xml = \SchemaDocument::get_taxon_xml($taxa);
$resource_path = CONTENT_RESOURCE_LOCAL_PATH . "257_temp.xml";
if (!($OUT = fopen($resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
    return;
}
fwrite($OUT, $xml);
fclose($OUT);
if (filesize($resource_path) > 600) {
    Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . "257.xml", CONTENT_RESOURCE_LOCAL_PATH . "257_previous.xml");
    Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . "257_temp.xml", CONTENT_RESOURCE_LOCAL_PATH . "257.xml");
    $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::find_or_create_by_translated_label('Force Harvest')->id . " WHERE id=257");
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} seconds              \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes   \n";
echo "\n\n Done processing.";
    }
    if ($resource->service_type_id != ServiceType::find_or_create_by_translated_label("EOL Transfer Schema")->id) {
        continue;
    }
    if (!in_array($resource, $resources)) {
        $resources[] = $resource;
    }
}
foreach ($resources as $resource) {
    // check the file's modified date and when it was last harvested
    if (!$resource->ready_to_update() && !$resource->ready_to_harvest(10)) {
        continue;
    }
    if ($resource->id == 11) {
        continue;
    }
    //biolib.cz
    if ($resource->id == 42) {
        continue;
    }
    //fishbase
    // if($resource->id!=59) continue;
    if ($resource->accesspoint_url) {
        echo "{$resource->id} {$resource->accesspoint_url}\n";
        $new_resource_path = $manager->grab_file($resource->accesspoint_url, "resource", array('resource_id' => $resource->id, 'timeout' => 600));
        if (!$new_resource_path) {
            $mysqli->update("UPDATE resources SET resource_status_id=" . ResourceStatus::find_or_create_by_translated_label("Upload Failed")->id . " WHERE id={$resource->id}");
        }
    }
}
$log->finished();