private function process_keys_spreadsheet()
 {
     $taxa_objects = array();
     $filename = Functions::save_remote_file_to_local(self::ID_KEYS_FILE, array('timeout' => 4800, 'download_attempts' => 5));
     print "\n[{$filename}]\n";
     foreach (new FileIterator($filename, true) as $line_number => $line) {
         $line = trim($line);
         $fields = explode("\t", $line);
         $name = trim($fields[0]);
         print "\n name: {$name}";
         if ($id_key1 = trim(@$fields[1])) {
             $taxa_objects[$name][] = $id_key1;
         }
         if ($id_key2 = trim(@$fields[2])) {
             $taxa_objects[$name][] = $id_key2;
         }
         if ($id_key3 = trim(@$fields[3])) {
             $taxa_objects[$name][] = $id_key3;
         }
     }
     if (count($taxa_objects) <= 1) {
         echo "\n\nInvalid text file. Program will terminate.\n";
         return;
     }
     return $taxa_objects;
 }
 function get_all_taxa()
 {
     //TODO: [next] button is not processed
     ini_set("auto_detect_line_endings", true);
     $filename = Functions::save_remote_file_to_local($this->strings_to_search, array('cache' => 1, 'resource_id' => '959'));
     //resource_id here is just to have the cache stored in that folder
     $types[1] = 'country records';
     $types[2] = 'US state records';
     $types[3] = 'Canadian province records';
     $i = 1;
     foreach (new FileIterator($filename) as $line_number => $region) {
         if ($region == "") {
             $i++;
             continue;
         }
         // $region = 'China'; //debug
         $type = $types[$i];
         $url = $this->pages[$type] . $region;
         if ($records = self::process_html($url, 'pre')) {
             self::create_archive($records, $region, 'present');
         }
         if ($type == 'country records') {
             $url = $this->pages['endemic'] . $region;
             if ($records = self::process_html($url, 'end')) {
                 self::create_archive($records, $region, 'endemic');
             }
         }
     }
     unlink($filename);
     $this->archive_builder->finalize(TRUE);
 }
示例#3
0
 function get_all_taxa($data_dump_url = false)
 {
     $labels = self::get_headers();
     if ($data_dump_url) {
         $this->data_dump_url = $data_dump_url;
     }
     if ($temp_filepath = Functions::save_remote_file_to_local($this->data_dump_url, array('timeout' => 4800, 'download_attempts' => 5))) {
         $not80 = 0;
         $i = 0;
         foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
             if ($line) {
                 $record = self::prepare_row_data(trim($line), $labels);
                 if (count($record) != 80) {
                     $not80++;
                     // means invalid CSV row, needs attention by provider
                     echo "\n investigate: invalid CSV row, needs attention by provider [" . count($record) . "]";
                     print_r($record);
                 } else {
                     if (@$record['SCIENTIFIC_NAME']) {
                         $i++;
                         debug("{$i}. " . $record['SCIENTIFIC_NAME'] . " [" . count($record) . "]\n");
                         self::parse_record_element($record);
                     }
                 }
             }
         }
         debug("\n not 80: {$not80} \n");
         $this->create_archive();
     }
 }
 private function prepare_files()
 {
     if ($input_file = Functions::save_remote_file_to_local($this->source_file_path, array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         $temp_dir = create_temp_dir() . "/";
         $this->source_file_path = $temp_dir . "spg_falo.txt";
         self::convert_xlsx_to_tab($input_file, $this->source_file_path);
         unlink($input_file);
     }
 }
 /**
  * Download source data from URL to temporary location on local file system.
  */
 private function download_source_data_file()
 {
     $start = microtime(true);
     debug("Downloading source file.");
     $download_options = array('file_extension' => pathinfo($this->source_url, PATHINFO_EXTENSION), 'cache' => true, 'timeout' => 172800);
     $this->source_file_path = Functions::save_remote_file_to_local($this->source_url, $download_options);
     if (!file_exists($this->source_file_path)) {
         throw new \Exception('Error downloading source file.');
     }
     $this->profile($start);
 }
示例#6
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url);
         $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR");
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
示例#7
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0);
         $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year");
         $records = self::add_uppercase_fields($records);
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
示例#8
0
 function get_all_taxa()
 {
     require_library('XLSParser');
     $docs = count($this->spreadsheets);
     $doc_count = 0;
     foreach ($this->spreadsheets as $doc) {
         $doc_count++;
         echo "\n processing [{$doc}]...\n";
         if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
             $parser = new XLSParser();
             $arr = $parser->convert_sheet_to_array($path);
             $fields = array_keys($arr);
             $i = -1;
             $rows = count($arr["Species"]);
             echo "\n total {$path}: {$rows} \n";
             foreach ($arr["Species"] as $Species) {
                 $i++;
                 $rec = array();
                 foreach ($fields as $field) {
                     $rec[$field] = $arr[$field][$i];
                 }
                 $rec = array_map('trim', $rec);
                 /* breakdown when caching
                    $cont = false;
                    // if($i >= 1 && $i < 6000)         $cont = true;
                    // if($i >= 3000 && $i < 6000)      $cont = true;
                    // if($i >= 6000 && $i < 9000)      $cont = true;
                    // if($i >= 9000 && $i < 12000)     $cont = true;
                    // if($i >= 11800 && $i < 15000)    $cont = true;
                    if(!$cont) continue;
                    */
                 print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n";
                 $rec = self::clean_taxon_name($rec);
                 $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"]));
                 // remove parenthesis
                 $taxon_id = str_replace(" ", "_", $taxon_id);
                 $rec["taxon_id"] = md5($taxon_id);
                 self::create_instances_from_taxon_object($rec);
                 self::prepare_images($rec);
                 self::prepare_data($rec);
             }
             unlink($path);
         } else {
             echo "\n [{$doc}] unavailable! \n";
         }
     }
     $this->archive_builder->finalize(TRUE);
 }
 function export_xml_to_archive($params, $xml_file_YN = false)
 {
     if (!$xml_file_YN) {
         require_library('connectors/INBioAPI');
         $func = new INBioAPI();
         $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => 0));
         // "expire_seconds" -- false => won't expire; 0 => expires now //debug
         print_r($paths);
         $params["path"] = $paths["temp_dir"];
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         recursive_rmdir($paths["temp_dir"]);
         // remove temp dir
     } else {
         $params['path'] = DOC_ROOT . "tmp/";
         $local_xml_file = Functions::save_remote_file_to_local($params['eol_xml_file'], array('file_extension' => "xml", 'cache' => 0, "timeout" => 7200, "download_attempts" => 2, "delay_in_minutes" => 2));
         //debug - cache should be 0 zero in normal operation
         $params['filename'] = pathinfo($local_xml_file, PATHINFO_BASENAME);
         self::convert_xml($params);
         $this->archive_builder->finalize(TRUE);
         unlink($local_xml_file);
     }
 }
 public function convert_spreadsheet($spreadsheet, $worksheet = null)
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     if ($path = Functions::save_remote_file_to_local($spreadsheet, $this->spreadsheet_options)) {
         $arr = $parser->convert_sheet_to_array($path, $worksheet);
         unlink($path);
         return $arr;
     } else {
         echo "\n [{$spreadsheet}] unavailable! \n";
     }
     return false;
 }
示例#11
0
 private function parse_csv_file($type, $taxon = array())
 {
     echo "\n Processing {$type}...\n";
     if ($type == "collection") {
         $no_of_fields = 68;
         if (!in_array(@$taxon["rank"], array("species", "subspecies"))) {
             return;
         }
         $taxon_id = $taxon["orig_no"];
         $url = $this->service[$type] . $taxon["taxon_name"];
         $path = Functions::save_remote_file_to_local($url, $this->download_options);
     } elseif ($type == "occurrence") {
         $no_of_fields = 25;
         if (!in_array(@$taxon["rank"], array("species", "subspecies"))) {
             return;
         }
         $taxon_id = $taxon["orig_no"];
         $url = $this->service[$type] . $taxon["taxon_name"];
         $path = Functions::save_remote_file_to_local($url, $this->download_options);
     } elseif ($type == "taxon") {
         $no_of_fields = 32;
         $path = Functions::save_remote_file_to_local($this->service["taxon"], array("timeout" => 999999, "cache" => 0));
         // debug cache should be 0; only when debugging should be 1
     }
     $j = 0;
     foreach (new FileIterator($path) as $line_number => $line) {
         $rec = array();
         $j++;
         if ($j % 25000 == 0) {
             echo "\n{$j}. [{$type}]";
         }
         // if($j >= 1000) break; //debug
         if ($line) {
             $line = trim($line);
             if ($j == 1) {
                 $fields = explode(",", $line);
                 continue;
             } else {
                 $values = explode(",", $line);
                 $values = str_getcsv($line);
                 if (count($values) == $no_of_fields) {
                     $i = 0;
                     foreach ($values as $value) {
                         $field = str_replace('"', '', $fields[$i]);
                         $rec[$field] = str_replace('"', '', $value);
                         $i++;
                     }
                 } else {
                     print_r($values);
                     echo "\n investigate rec is not {$no_of_fields}";
                 }
             }
             if ($rec) {
                 if ($type == "collection") {
                     self::process_taxon_collection($rec, $taxon_id, $url);
                 } elseif ($type == "occurrence") {
                     self::process_taxon_occurrence($rec, $taxon_id, $url);
                 } elseif ($type == "taxon") {
                     self::process_taxon($rec);
                 }
             }
         }
     }
     unlink($path);
 }
示例#12
0
 private function divide_text_file($divisor)
 {
     $temp_filepath = Functions::save_remote_file_to_local(self::DL_MAP_SPECIES_LIST, array('timeout' => 4800, 'download_attempts' => 5));
     if (!$temp_filepath) {
         echo "\n\nExternal file not available. Program will terminate.\n";
         return;
     }
     $i = 0;
     $file_ctr = 0;
     $str = "";
     print "\n";
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         if ($line) {
             $line .= "\n";
             // FileIterator removes the carriage-return char
             $i++;
             $str .= $line;
             print "{$i}. {$line}\n";
             if ($i == $divisor) {
                 print "\n";
                 $file_ctr++;
                 $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
                 if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
                     return;
                 }
                 fwrite($OUT, $str);
                 fclose($OUT);
                 $str = "";
                 $i = 0;
             }
         }
         //if($i >= 5) break; //debug
     }
     //last writes
     if ($str) {
         $file_ctr++;
         $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
         if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
             return;
         }
         fwrite($OUT, $str);
         fclose($OUT);
     }
     //create work_list
     $str = "";
     for ($i = 1; $i <= $file_ctr; $i++) {
         $str .= "batch_" . Functions::format_number_with_leading_zeros($i, 3) . "\n";
     }
     $filename = $this->TEMP_FILE_PATH . "work_list.txt";
     if ($fp = Functions::file_open($filename, "w")) {
         fwrite($fp, $str);
         fclose($fp);
     }
 }
示例#13
0
 private function get_uris()
 {
     $uris = array();
     $options = $this->download_options;
     $options["cache"] = 1;
     // $options["expire_seconds"] = 0;
     if ($filename = Functions::save_remote_file_to_local($this->uri_list, $options)) {
         foreach (new FileIterator($filename) as $line_number => $line) {
             if ($line) {
                 $arr = explode("--", $line);
                 if (count($arr) > 1) {
                     $measurement = strtolower(trim(str_ireplace(array(":", "-"), "", $arr[0])));
                     $value = strtolower(trim(str_ireplace(array(":", "-"), "", $arr[1])));
                     $uris[$measurement] = $value;
                 }
             }
         }
         unlink($filename);
     }
     return $uris;
 }
 private function get_vernacular_names()
 {
     $temp_filepath = Functions::save_remote_file_to_local($this->vernacular_path, array('timeout' => 4800, 'download_attempts' => 5));
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         if ($line) {
             $fields = explode("\t", trim($line));
             $fields = array_map('trim', $fields);
             //trims all array values in the array
             $common_name = @$fields[1];
             $sciname = Functions::canonical_form(trim(@$fields[0]));
             $taxon_id = @$this->taxa_all[$sciname]['Identifier'];
             if ($common_name == '' || $taxon_id == '' || $sciname == '') {
                 continue;
             }
             $language = self::get_language(@$fields[3]);
             $vernacular = new \eol_schema\VernacularName();
             $vernacular->taxonID = $taxon_id;
             $vernacular->vernacularName = (string) $common_name;
             $vernacular->language = $language;
             $vernacular_id = md5("{$vernacular->taxonID}|{$vernacular->vernacularName}|{$vernacular->language}");
             if (!isset($this->vernacular_name_ids[$vernacular_id])) {
                 $this->archive_builder->write_object_to_file($vernacular);
                 $this->vernacular_name_ids[$vernacular_id] = 1;
             }
         }
     }
 }
 private function get_rows_from_dump_file($url)
 {
     $path = Functions::save_remote_file_to_local($url, $this->download_options);
     $urls = array();
     foreach (new FileIterator($path) as $line_number => $line) {
         if ($line) {
             $urls[$line] = "";
         }
     }
     unlink($path);
     return $urls;
 }
 private function download_file_accordingly($path)
 {
     $pathinfo = pathinfo($path);
     if (stripos($pathinfo['dirname'], "https://www.dropbox.com/") !== false) {
         $a = explode("?", $pathinfo['basename']);
         $extension = self::get_extension($a[0]);
         $download_options = $this->download_options;
         $download_options['file_extension'] = $extension;
         $path = str_ireplace("dl=0", "dl=1", $path);
         if ($newpath = Functions::save_remote_file_to_local($path, $download_options)) {
             echo "\nnewpath: [{$newpath}]\n";
             return $newpath;
         }
     }
     return $path;
 }
示例#17
0
 private function get_spreadsheet($spreadsheet)
 {
     require_library('connectors/LifeDeskToScratchpadAPI');
     $func = new LifeDeskToScratchpadAPI();
     $final = array();
     $habitats = array();
     $spreadsheet_options = array("cache" => 0, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 1);
     //we don't want to cache spreadsheet
     if ($filename = Functions::save_remote_file_to_local($spreadsheet, $spreadsheet_options)) {
         if ($arr = $func->convert_spreadsheet($filename, 0, $spreadsheet_options)) {
             $i = 0;
             foreach ($arr['subsection'] as $subsection) {
                 if ($subsection) {
                     $final[$subsection]['section'] = $arr['section'][$i];
                     $final[$subsection]['habitats'][] = $arr['source text'][$i];
                 }
                 $habitats[$arr['source text'][$i]] = $arr['term'][$i];
                 $i++;
             }
         }
         unlink($filename);
     }
     $final = array_filter($final);
     //remove null arrays
     $habitats = array_filter($habitats);
     //remove null arrays
     return array('subsections' => $final, 'habitats' => $habitats);
 }
示例#18
0
 private function get_names_with_blank_status_but_with_eol_page()
 {
     $names = array();
     $options = $this->download_options;
     $options['cache'] = 1;
     if ($filename = Functions::save_remote_file_to_local($this->taxa_with_blank_status_but_with_eol_page_dump_file, $options)) {
         foreach (new FileIterator($filename) as $line_number => $line) {
             if ($val = trim($line)) {
                 $names[$val] = "";
             }
         }
         unlink($filename);
     }
     return $names;
 }
function generate_text_files($text_file)
{
    if ($temp_path = Functions::save_remote_file_to_local($text_file, array('cache' => 1, 'download_timeout_seconds' => 4800, 'download_wait_time' => 300000, 'expire_seconds' => false))) {
        $folder = "exported_titles";
        initialize_text_files($folder);
        $file = Functions::file_open($temp_path, "r");
        $first_row = true;
        $i = 0;
        while (!feof($file)) {
            $cols = explode("\t", fgets($file));
            $title_id = trim(@$cols[0]);
            $title = trim(@$cols[3]);
            if (!$title) {
                continue;
            }
            if ($first_row) {
                $first_row = false;
                continue;
            }
            // print_r($cols); exit;
            echo "[{$title_id}]";
            save_title_to_text($title_id, $title, $folder);
            save_title_to_text($title_id, $title, $folder, true);
        }
        fclose($file);
    }
}
 private function get_urls_from_dump($fname)
 {
     $urls = array();
     if ($filename = Functions::save_remote_file_to_local($fname, $this->download_options)) {
         foreach (new FileIterator($filename) as $line_number => $line) {
             if ($line) {
                 $urls[$line] = '';
             }
         }
         unlink($filename);
     }
     return array_keys($urls);
 }
示例#21
0
moves the <agent>s with role = 'source' to bibliographicCitation
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('ResourceDataObjectElementsSetting');
$timestart = time_elapsed();
$resource_id = 20;
$resource_path = "http://www.pensoft.net/J_FILES/EoLData/ZooKeys.xml";
$result = $GLOBALS['db_connection']->select("SELECT accesspoint_url FROM resources WHERE id={$resource_id}");
if ($result && ($row = $result->fetch_row())) {
    $resource_path_from_registry = $row[0];
    if ($resource_path != $resource_path_from_registry && $resource_path_from_registry != '') {
        $resource_path = $resource_path_from_registry;
    }
}
echo "\n processing resource: {$resource_path} \n";
if ($local_path = Functions::save_remote_file_to_local($resource_path, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5))) {
    $func = new ResourceDataObjectElementsSetting($resource_id, $local_path);
    $dataObjects = get_values($local_path);
    $xml = remove_elements($local_path);
    $func->save_resource_document($xml);
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    $xml = fill_up_values($resource_path, $dataObjects);
    $func->save_resource_document($xml);
    Functions::set_resource_status_to_force_harvest($resource_id);
    // remove tmp file
    unlink($local_path);
    debug("\n temporary file removed: [{$local_path}]");
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = {$elapsed_time_sec} seconds             \n";
示例#22
0
 private function reconcile_with_old_master_list($hl_taxa)
 {
     if (!($write = fopen($this->MASTER_LIST, "a"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->MASTER_LIST);
         return;
     }
     $options = $this->download_options;
     $options['expire_seconds'] = false;
     $temp_filepath = Functions::save_remote_file_to_local($this->OLD_MASTER_LIST, $options);
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         $split = explode("\t", trim($line));
         if ($sciname = @$split[1]) {
             $id = $split[0];
             if (!isset($hl_taxa[$sciname]["taxon_id"])) {
                 // echo "\n to be added: [$sciname - $id]";
                 fwrite($write, $id . "\t" . $sciname . "\t" . "" . "\n");
             }
         }
     }
     fclose($write);
 }
示例#23
0
 private function get_names_list($fname, $type)
 {
     $counts = array();
     $names = array();
     $options = $this->download_options;
     $options['cache'] = 1;
     // debug orig should be 1
     if ($filename = Functions::save_remote_file_to_local($fname, $options)) {
         foreach (new FileIterator($filename) as $line_number => $line) {
             if ($line) {
                 $line = trim($line);
                 $values = explode("\t", $line);
                 $parts = explode(" ", $values[3]);
                 // scientificName is 4th column thus index key = 3
                 if ($type == "genus") {
                     if (@$parts[0] && count($parts) > 1) {
                         $names[$parts[0]] = '';
                     }
                 } else {
                     if (count($parts) == 1) {
                         $names[$parts[0]] = '';
                     }
                 }
             }
         }
         unlink($filename);
     }
     $names = array_keys($names);
     array_shift($names);
     return $names;
 }
示例#24
0
s.avg_length_sp, s.range_length, s.range_length_sp, s.avg_weight, s.avg_weight_sp, s.range_weight, s.range_weight_sp, s.conservation_status_notes, 
s.conservation_status_notes_sp, s.common_name, s.common_name_sp, s.other_names, s.other_names_sp, s.refs, s.refs_sp, s.links, s.links_sp, 
s.dimorphism, s.dimorphism_sp, s.legend, s.legend_sp, s.refs, s.refs_sp, s.adaptation, s.adaptation_sp, cs.conservation_status_id, 
cs.conservation_status_title, cs.conservation_status_title_sp, cs.conservation_status_abbrev 
From nam_species s 
LEFT JOIN nam_genus g ON s.genus_id = g.genus_id 
LEFT Join nam_family f ON g.family_id = f.Family_ID 
LEFT Join nam_orders o ON f.order_id = o.order_id 
LEFT Join nam_conservation_status cs ON s.conservation_status_id = cs.id";

We've requested the partner to provide us with just a text dump of the result of the query above, but it seems they'll just
continue providing us with the Access MDB.
*/
$remote_file = "https://dl.dropboxusercontent.com/u/7597512/NorthAmericanMammals/data_from_sql_export.txt";
// $text_file = DOC_ROOT . "/update_resources/connectors/files/NorthAmericanMammals/data_from_sql_export.txt";
$text_file = Functions::save_remote_file_to_local($remote_file, array('download_wait_time' => 1000000, 'timeout' => 600));
require_library('connectors/FishBaseAPI');
$fields = array("species_id", "genus_name", "sci_name", "family_name", "order_name", "avg_length", "avg_length_sp", "range_length", "range_length_sp", "avg_weight", "avg_weight_sp", "range_weight", "range_weight_sp", "conservation_status_notes", "conservation_status_notes_sp", "common_name", "common_name_sp", "other_names", "other_names_sp", "refs", "refs_sp", "links", "links_sp", "dimorphism", "dimorphism_sp", "legend", "legend_sp", "refs(2)", "refs_sp(2)", "adaptation", "adaptation_sp", "conservation_status_id", "conservation_status_title", "conservation_status_title_sp", "conservation_status_abbrev");
$taxa = FishBaseAPI::make_array($text_file, $fields, "", array());
$resource_id = 85;
//for North American Mammals
$schema_taxa = array();
$used_taxa = array();
$ctr = 0;
foreach ($taxa as $row) {
    $ctr++;
    print "{$ctr} - ";
    $dwc_Kingdom = "Animalia";
    $dwc_Order = trim($row["order_name"]);
    $dwc_Family = trim($row["family_name"]);
    $dwc_Genus = trim($row["genus_name"]);
 function get_mediaURL_for_first_40k_images()
 {
     require_library('connectors/BOLDSysAPI');
     $func = new BOLDSysAPI();
     $source = "http://localhost/eol_php_code/update_resources/connectors/files/MCZ_Harvard/MCZimages_still40k.tsv";
     $destination = DOC_ROOT . "/update_resources/connectors/files/MCZ_Harvard/First40k.txt";
     if ($temp_filepath = Functions::save_remote_file_to_local($source, array('timeout' => 4800, 'download_attempts' => 2))) {
         $records = array();
         foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
             if ($line) {
                 $cols = explode("\t", $line);
                 if (count($cols) == 80) {
                     $records[str_replace('"', '', $cols[1])] = 1;
                 }
             }
         }
     }
     $func::save_to_json_file(array_keys($records), $destination);
 }
示例#26
0
 private function get_families_xlsx()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $families = array();
     // for family table
     $family_table = array();
     $fields = array("SpK", "K", "SbK", "IK", "SpP", "P", "SbP", "IP", "PvP", "SpC", "C", "SbC", "IC", "SpO", "O");
     // $dropbox_xlsx[] = "http://tiny.cc/FALO"; // from Cyndy's Dropbox
     $dropbox_xlsx[] = "https://dl.dropboxusercontent.com/u/7597512/NCBI_GGI/ALF2015.xlsx";
     // from Eli's Dropbox
     // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/FALO.xlsx"; // local
     // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/ALF2015.xlsx"; // local
     foreach ($dropbox_xlsx as $doc) {
         echo "\n processing [{$doc}]...\n";
         if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2, 'cache' => 1))) {
             $arr = $parser->convert_sheet_to_array($path);
             $i = 0;
             foreach ($arr["FAMILY"] as $family) {
                 $family = trim(str_ireplace(array("Family", '"'), "", $family));
                 if (is_numeric($family)) {
                     continue;
                 }
                 if ($family) {
                     $families[$family] = '';
                     foreach ($fields as $field) {
                         $family_table[$family][$field] = $arr[$field][$i];
                     }
                     // for family table
                 }
                 $i++;
             }
             unlink($path);
             break;
         } else {
             echo "\n [{$doc}] unavailable! \n";
         }
     }
     //save $family_table as json to text file, to be accessed later when generating the spreadsheet
     self::initialize_dump_file($this->temp_family_table_file);
     self::save_to_dump($family_table, $this->temp_family_table_file);
     echo "\n count family rows: " . count($family_table) . "\n";
     unset($family_table);
     return array_keys($families);
 }
示例#27
0
 function download_and_extract_remote_file($file = false)
 {
     if (!$file) {
         $file = $this->data_dump_url;
     }
     // used when this function is called elsewhere
     $temp_path = Functions::save_remote_file_to_local($file, DOWNLOAD_WAIT_TIME, 999999, 5, "xml");
     echo "\n [{$temp_path}] \n";
     // shell_exec("gzip -d " . $temp_path);
     // return str_ireplace(".xml.gz", ".xml", $temp_path);
     return $temp_path;
 }
示例#28
0
 private function process_DL_taxon_list()
 {
     $temp_filepath = Functions::save_remote_file_to_local(self::DL_MAP_SPECIES_LIST, array('timeout' => 4800, 'download_attempts' => 5));
     if (!$temp_filepath) {
         echo "\n\nExternal file not available. Program will terminate.\n";
         return;
     }
     $i = 0;
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         $i++;
         if ($line) {
             $m = 10000;
             $cont = false;
             if ($i >= 1 && $i < $m) {
                 $cont = true;
             }
             // if($i >=  $m   && $i < $m*2)  $cont = true;
             // if($i >=  $m*2 && $i < $m*3)  $cont = true;
             // if($i >=  $m*3 && $i < $m*4)  $cont = true;
             // if($i >=  $m*4 && $i < $m*5)  $cont = true;
             if (!$cont) {
                 continue;
             }
             $arr = explode("\t", $line);
             $sciname = trim($arr[0]);
             echo "\n[{$sciname}]\n";
             self::main_loop($sciname);
         }
         // if($i >= 5) break; //debug
     }
 }
示例#29
0
<?php

namespace php_active_record;

/* connector for ZooKeys
estimated execution time: 1 minute
Connector reads the XML provided by partner and 
moves the <agent>s with role = 'source' to bibliographicCitation
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
require_library('ResourceDataObjectElementsSetting');
$timestart = time_elapsed();
$resource_id = 20;
$resource_path = Functions::get_accesspoint_url_if_available($resource_id, "http://www.pensoft.net/J_FILES/EoLData/ZooKeys.xml");
echo "\n processing resource: {$resource_path} \n";
if ($local_path = Functions::save_remote_file_to_local($resource_path, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 86400, 'download_attempts' => 3, 'delay_in_minutes' => 2))) {
    $func = new ResourceDataObjectElementsSetting($resource_id, $local_path);
    $dataObjects = get_values($local_path);
    $xml = remove_elements($local_path);
    $func->save_resource_document($xml);
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    $xml = fill_up_values($resource_path, $dataObjects);
    $func->save_resource_document($xml);
    unlink($local_path);
}
//start creating the archive file using the generated EOL XML file above
require_library('connectors/ConvertEOLtoDWCaAPI');
$resource_id = 20;
$params["eol_xml_file"] = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
$params["filename"] = "no need to mention here.xml";
$params["dataset"] = "Pensoft XML files";