private function process_keys_spreadsheet() { $taxa_objects = array(); $filename = Functions::save_remote_file_to_local(self::ID_KEYS_FILE, array('timeout' => 4800, 'download_attempts' => 5)); print "\n[{$filename}]\n"; foreach (new FileIterator($filename, true) as $line_number => $line) { $line = trim($line); $fields = explode("\t", $line); $name = trim($fields[0]); print "\n name: {$name}"; if ($id_key1 = trim(@$fields[1])) { $taxa_objects[$name][] = $id_key1; } if ($id_key2 = trim(@$fields[2])) { $taxa_objects[$name][] = $id_key2; } if ($id_key3 = trim(@$fields[3])) { $taxa_objects[$name][] = $id_key3; } } if (count($taxa_objects) <= 1) { echo "\n\nInvalid text file. Program will terminate.\n"; return; } return $taxa_objects; }
function get_all_taxa() { //TODO: [next] button is not processed ini_set("auto_detect_line_endings", true); $filename = Functions::save_remote_file_to_local($this->strings_to_search, array('cache' => 1, 'resource_id' => '959')); //resource_id here is just to have the cache stored in that folder $types[1] = 'country records'; $types[2] = 'US state records'; $types[3] = 'Canadian province records'; $i = 1; foreach (new FileIterator($filename) as $line_number => $region) { if ($region == "") { $i++; continue; } // $region = 'China'; //debug $type = $types[$i]; $url = $this->pages[$type] . $region; if ($records = self::process_html($url, 'pre')) { self::create_archive($records, $region, 'present'); } if ($type == 'country records') { $url = $this->pages['endemic'] . $region; if ($records = self::process_html($url, 'end')) { self::create_archive($records, $region, 'endemic'); } } } unlink($filename); $this->archive_builder->finalize(TRUE); }
function get_all_taxa($data_dump_url = false) { $labels = self::get_headers(); if ($data_dump_url) { $this->data_dump_url = $data_dump_url; } if ($temp_filepath = Functions::save_remote_file_to_local($this->data_dump_url, array('timeout' => 4800, 'download_attempts' => 5))) { $not80 = 0; $i = 0; foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { if ($line) { $record = self::prepare_row_data(trim($line), $labels); if (count($record) != 80) { $not80++; // means invalid CSV row, needs attention by provider echo "\n investigate: invalid CSV row, needs attention by provider [" . count($record) . "]"; print_r($record); } else { if (@$record['SCIENTIFIC_NAME']) { $i++; debug("{$i}. " . $record['SCIENTIFIC_NAME'] . " [" . count($record) . "]\n"); self::parse_record_element($record); } } } } debug("\n not 80: {$not80} \n"); $this->create_archive(); } }
private function prepare_files() { if ($input_file = Functions::save_remote_file_to_local($this->source_file_path, array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) { $temp_dir = create_temp_dir() . "/"; $this->source_file_path = $temp_dir . "spg_falo.txt"; self::convert_xlsx_to_tab($input_file, $this->source_file_path); unlink($input_file); } }
/** * Download source data from URL to temporary location on local file system. */ private function download_source_data_file() { $start = microtime(true); debug("Downloading source file."); $download_options = array('file_extension' => pathinfo($this->source_url, PATHINFO_EXTENSION), 'cache' => true, 'timeout' => 172800); $this->source_file_path = Functions::save_remote_file_to_local($this->source_url, $download_options); if (!file_exists($this->source_file_path)) { throw new \Exception('Error downloading source file.'); } $this->profile($start); }
private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url); $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR"); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0); $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year"); $records = self::add_uppercase_fields($records); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
function get_all_taxa() { require_library('XLSParser'); $docs = count($this->spreadsheets); $doc_count = 0; foreach ($this->spreadsheets as $doc) { $doc_count++; echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) { $parser = new XLSParser(); $arr = $parser->convert_sheet_to_array($path); $fields = array_keys($arr); $i = -1; $rows = count($arr["Species"]); echo "\n total {$path}: {$rows} \n"; foreach ($arr["Species"] as $Species) { $i++; $rec = array(); foreach ($fields as $field) { $rec[$field] = $arr[$field][$i]; } $rec = array_map('trim', $rec); /* breakdown when caching $cont = false; // if($i >= 1 && $i < 6000) $cont = true; // if($i >= 3000 && $i < 6000) $cont = true; // if($i >= 6000 && $i < 9000) $cont = true; // if($i >= 9000 && $i < 12000) $cont = true; // if($i >= 11800 && $i < 15000) $cont = true; if(!$cont) continue; */ print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n"; $rec = self::clean_taxon_name($rec); $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"])); // remove parenthesis $taxon_id = str_replace(" ", "_", $taxon_id); $rec["taxon_id"] = md5($taxon_id); self::create_instances_from_taxon_object($rec); self::prepare_images($rec); self::prepare_data($rec); } unlink($path); } else { echo "\n [{$doc}] unavailable! \n"; } } $this->archive_builder->finalize(TRUE); }
function export_xml_to_archive($params, $xml_file_YN = false) { if (!$xml_file_YN) { require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => 0)); // "expire_seconds" -- false => won't expire; 0 => expires now //debug print_r($paths); $params["path"] = $paths["temp_dir"]; self::convert_xml($params); $this->archive_builder->finalize(TRUE); recursive_rmdir($paths["temp_dir"]); // remove temp dir } else { $params['path'] = DOC_ROOT . "tmp/"; $local_xml_file = Functions::save_remote_file_to_local($params['eol_xml_file'], array('file_extension' => "xml", 'cache' => 0, "timeout" => 7200, "download_attempts" => 2, "delay_in_minutes" => 2)); //debug - cache should be 0 zero in normal operation $params['filename'] = pathinfo($local_xml_file, PATHINFO_BASENAME); self::convert_xml($params); $this->archive_builder->finalize(TRUE); unlink($local_xml_file); } }
public function convert_spreadsheet($spreadsheet, $worksheet = null) { require_library('XLSParser'); $parser = new XLSParser(); if ($path = Functions::save_remote_file_to_local($spreadsheet, $this->spreadsheet_options)) { $arr = $parser->convert_sheet_to_array($path, $worksheet); unlink($path); return $arr; } else { echo "\n [{$spreadsheet}] unavailable! \n"; } return false; }
private function parse_csv_file($type, $taxon = array()) { echo "\n Processing {$type}...\n"; if ($type == "collection") { $no_of_fields = 68; if (!in_array(@$taxon["rank"], array("species", "subspecies"))) { return; } $taxon_id = $taxon["orig_no"]; $url = $this->service[$type] . $taxon["taxon_name"]; $path = Functions::save_remote_file_to_local($url, $this->download_options); } elseif ($type == "occurrence") { $no_of_fields = 25; if (!in_array(@$taxon["rank"], array("species", "subspecies"))) { return; } $taxon_id = $taxon["orig_no"]; $url = $this->service[$type] . $taxon["taxon_name"]; $path = Functions::save_remote_file_to_local($url, $this->download_options); } elseif ($type == "taxon") { $no_of_fields = 32; $path = Functions::save_remote_file_to_local($this->service["taxon"], array("timeout" => 999999, "cache" => 0)); // debug cache should be 0; only when debugging should be 1 } $j = 0; foreach (new FileIterator($path) as $line_number => $line) { $rec = array(); $j++; if ($j % 25000 == 0) { echo "\n{$j}. [{$type}]"; } // if($j >= 1000) break; //debug if ($line) { $line = trim($line); if ($j == 1) { $fields = explode(",", $line); continue; } else { $values = explode(",", $line); $values = str_getcsv($line); if (count($values) == $no_of_fields) { $i = 0; foreach ($values as $value) { $field = str_replace('"', '', $fields[$i]); $rec[$field] = str_replace('"', '', $value); $i++; } } else { print_r($values); echo "\n investigate rec is not {$no_of_fields}"; } } if ($rec) { if ($type == "collection") { self::process_taxon_collection($rec, $taxon_id, $url); } elseif ($type == "occurrence") { self::process_taxon_occurrence($rec, $taxon_id, $url); } elseif ($type == "taxon") { self::process_taxon($rec); } } } } unlink($path); }
private function divide_text_file($divisor) { $temp_filepath = Functions::save_remote_file_to_local(self::DL_MAP_SPECIES_LIST, array('timeout' => 4800, 'download_attempts' => 5)); if (!$temp_filepath) { echo "\n\nExternal file not available. Program will terminate.\n"; return; } $i = 0; $file_ctr = 0; $str = ""; print "\n"; foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { if ($line) { $line .= "\n"; // FileIterator removes the carriage-return char $i++; $str .= $line; print "{$i}. {$line}\n"; if ($i == $divisor) { print "\n"; $file_ctr++; $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3); if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) { return; } fwrite($OUT, $str); fclose($OUT); $str = ""; $i = 0; } } //if($i >= 5) break; //debug } //last writes if ($str) { $file_ctr++; $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3); if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) { return; } fwrite($OUT, $str); fclose($OUT); } //create work_list $str = ""; for ($i = 1; $i <= $file_ctr; $i++) { $str .= "batch_" . Functions::format_number_with_leading_zeros($i, 3) . "\n"; } $filename = $this->TEMP_FILE_PATH . "work_list.txt"; if ($fp = Functions::file_open($filename, "w")) { fwrite($fp, $str); fclose($fp); } }
private function get_uris() { $uris = array(); $options = $this->download_options; $options["cache"] = 1; // $options["expire_seconds"] = 0; if ($filename = Functions::save_remote_file_to_local($this->uri_list, $options)) { foreach (new FileIterator($filename) as $line_number => $line) { if ($line) { $arr = explode("--", $line); if (count($arr) > 1) { $measurement = strtolower(trim(str_ireplace(array(":", "-"), "", $arr[0]))); $value = strtolower(trim(str_ireplace(array(":", "-"), "", $arr[1]))); $uris[$measurement] = $value; } } } unlink($filename); } return $uris; }
private function get_vernacular_names() { $temp_filepath = Functions::save_remote_file_to_local($this->vernacular_path, array('timeout' => 4800, 'download_attempts' => 5)); foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { if ($line) { $fields = explode("\t", trim($line)); $fields = array_map('trim', $fields); //trims all array values in the array $common_name = @$fields[1]; $sciname = Functions::canonical_form(trim(@$fields[0])); $taxon_id = @$this->taxa_all[$sciname]['Identifier']; if ($common_name == '' || $taxon_id == '' || $sciname == '') { continue; } $language = self::get_language(@$fields[3]); $vernacular = new \eol_schema\VernacularName(); $vernacular->taxonID = $taxon_id; $vernacular->vernacularName = (string) $common_name; $vernacular->language = $language; $vernacular_id = md5("{$vernacular->taxonID}|{$vernacular->vernacularName}|{$vernacular->language}"); if (!isset($this->vernacular_name_ids[$vernacular_id])) { $this->archive_builder->write_object_to_file($vernacular); $this->vernacular_name_ids[$vernacular_id] = 1; } } } }
private function get_rows_from_dump_file($url) { $path = Functions::save_remote_file_to_local($url, $this->download_options); $urls = array(); foreach (new FileIterator($path) as $line_number => $line) { if ($line) { $urls[$line] = ""; } } unlink($path); return $urls; }
private function download_file_accordingly($path) { $pathinfo = pathinfo($path); if (stripos($pathinfo['dirname'], "https://www.dropbox.com/") !== false) { $a = explode("?", $pathinfo['basename']); $extension = self::get_extension($a[0]); $download_options = $this->download_options; $download_options['file_extension'] = $extension; $path = str_ireplace("dl=0", "dl=1", $path); if ($newpath = Functions::save_remote_file_to_local($path, $download_options)) { echo "\nnewpath: [{$newpath}]\n"; return $newpath; } } return $path; }
private function get_spreadsheet($spreadsheet) { require_library('connectors/LifeDeskToScratchpadAPI'); $func = new LifeDeskToScratchpadAPI(); $final = array(); $habitats = array(); $spreadsheet_options = array("cache" => 0, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 1); //we don't want to cache spreadsheet if ($filename = Functions::save_remote_file_to_local($spreadsheet, $spreadsheet_options)) { if ($arr = $func->convert_spreadsheet($filename, 0, $spreadsheet_options)) { $i = 0; foreach ($arr['subsection'] as $subsection) { if ($subsection) { $final[$subsection]['section'] = $arr['section'][$i]; $final[$subsection]['habitats'][] = $arr['source text'][$i]; } $habitats[$arr['source text'][$i]] = $arr['term'][$i]; $i++; } } unlink($filename); } $final = array_filter($final); //remove null arrays $habitats = array_filter($habitats); //remove null arrays return array('subsections' => $final, 'habitats' => $habitats); }
private function get_names_with_blank_status_but_with_eol_page() { $names = array(); $options = $this->download_options; $options['cache'] = 1; if ($filename = Functions::save_remote_file_to_local($this->taxa_with_blank_status_but_with_eol_page_dump_file, $options)) { foreach (new FileIterator($filename) as $line_number => $line) { if ($val = trim($line)) { $names[$val] = ""; } } unlink($filename); } return $names; }
function generate_text_files($text_file) { if ($temp_path = Functions::save_remote_file_to_local($text_file, array('cache' => 1, 'download_timeout_seconds' => 4800, 'download_wait_time' => 300000, 'expire_seconds' => false))) { $folder = "exported_titles"; initialize_text_files($folder); $file = Functions::file_open($temp_path, "r"); $first_row = true; $i = 0; while (!feof($file)) { $cols = explode("\t", fgets($file)); $title_id = trim(@$cols[0]); $title = trim(@$cols[3]); if (!$title) { continue; } if ($first_row) { $first_row = false; continue; } // print_r($cols); exit; echo "[{$title_id}]"; save_title_to_text($title_id, $title, $folder); save_title_to_text($title_id, $title, $folder, true); } fclose($file); } }
private function get_urls_from_dump($fname) { $urls = array(); if ($filename = Functions::save_remote_file_to_local($fname, $this->download_options)) { foreach (new FileIterator($filename) as $line_number => $line) { if ($line) { $urls[$line] = ''; } } unlink($filename); } return array_keys($urls); }
moves the <agent>s with role = 'source' to bibliographicCitation */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('ResourceDataObjectElementsSetting'); $timestart = time_elapsed(); $resource_id = 20; $resource_path = "http://www.pensoft.net/J_FILES/EoLData/ZooKeys.xml"; $result = $GLOBALS['db_connection']->select("SELECT accesspoint_url FROM resources WHERE id={$resource_id}"); if ($result && ($row = $result->fetch_row())) { $resource_path_from_registry = $row[0]; if ($resource_path != $resource_path_from_registry && $resource_path_from_registry != '') { $resource_path = $resource_path_from_registry; } } echo "\n processing resource: {$resource_path} \n"; if ($local_path = Functions::save_remote_file_to_local($resource_path, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5))) { $func = new ResourceDataObjectElementsSetting($resource_id, $local_path); $dataObjects = get_values($local_path); $xml = remove_elements($local_path); $func->save_resource_document($xml); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $xml = fill_up_values($resource_path, $dataObjects); $func->save_resource_document($xml); Functions::set_resource_status_to_force_harvest($resource_id); // remove tmp file unlink($local_path); debug("\n temporary file removed: [{$local_path}]"); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} seconds \n";
private function reconcile_with_old_master_list($hl_taxa) { if (!($write = fopen($this->MASTER_LIST, "a"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->MASTER_LIST); return; } $options = $this->download_options; $options['expire_seconds'] = false; $temp_filepath = Functions::save_remote_file_to_local($this->OLD_MASTER_LIST, $options); foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { $split = explode("\t", trim($line)); if ($sciname = @$split[1]) { $id = $split[0]; if (!isset($hl_taxa[$sciname]["taxon_id"])) { // echo "\n to be added: [$sciname - $id]"; fwrite($write, $id . "\t" . $sciname . "\t" . "" . "\n"); } } } fclose($write); }
private function get_names_list($fname, $type) { $counts = array(); $names = array(); $options = $this->download_options; $options['cache'] = 1; // debug orig should be 1 if ($filename = Functions::save_remote_file_to_local($fname, $options)) { foreach (new FileIterator($filename) as $line_number => $line) { if ($line) { $line = trim($line); $values = explode("\t", $line); $parts = explode(" ", $values[3]); // scientificName is 4th column thus index key = 3 if ($type == "genus") { if (@$parts[0] && count($parts) > 1) { $names[$parts[0]] = ''; } } else { if (count($parts) == 1) { $names[$parts[0]] = ''; } } } } unlink($filename); } $names = array_keys($names); array_shift($names); return $names; }
s.avg_length_sp, s.range_length, s.range_length_sp, s.avg_weight, s.avg_weight_sp, s.range_weight, s.range_weight_sp, s.conservation_status_notes, s.conservation_status_notes_sp, s.common_name, s.common_name_sp, s.other_names, s.other_names_sp, s.refs, s.refs_sp, s.links, s.links_sp, s.dimorphism, s.dimorphism_sp, s.legend, s.legend_sp, s.refs, s.refs_sp, s.adaptation, s.adaptation_sp, cs.conservation_status_id, cs.conservation_status_title, cs.conservation_status_title_sp, cs.conservation_status_abbrev From nam_species s LEFT JOIN nam_genus g ON s.genus_id = g.genus_id LEFT Join nam_family f ON g.family_id = f.Family_ID LEFT Join nam_orders o ON f.order_id = o.order_id LEFT Join nam_conservation_status cs ON s.conservation_status_id = cs.id"; We've requested the partner to provide us with just a text dump of the result of the query above, but it seems they'll just continue providing us with the Access MDB. */ $remote_file = "https://dl.dropboxusercontent.com/u/7597512/NorthAmericanMammals/data_from_sql_export.txt"; // $text_file = DOC_ROOT . "/update_resources/connectors/files/NorthAmericanMammals/data_from_sql_export.txt"; $text_file = Functions::save_remote_file_to_local($remote_file, array('download_wait_time' => 1000000, 'timeout' => 600)); require_library('connectors/FishBaseAPI'); $fields = array("species_id", "genus_name", "sci_name", "family_name", "order_name", "avg_length", "avg_length_sp", "range_length", "range_length_sp", "avg_weight", "avg_weight_sp", "range_weight", "range_weight_sp", "conservation_status_notes", "conservation_status_notes_sp", "common_name", "common_name_sp", "other_names", "other_names_sp", "refs", "refs_sp", "links", "links_sp", "dimorphism", "dimorphism_sp", "legend", "legend_sp", "refs(2)", "refs_sp(2)", "adaptation", "adaptation_sp", "conservation_status_id", "conservation_status_title", "conservation_status_title_sp", "conservation_status_abbrev"); $taxa = FishBaseAPI::make_array($text_file, $fields, "", array()); $resource_id = 85; //for North American Mammals $schema_taxa = array(); $used_taxa = array(); $ctr = 0; foreach ($taxa as $row) { $ctr++; print "{$ctr} - "; $dwc_Kingdom = "Animalia"; $dwc_Order = trim($row["order_name"]); $dwc_Family = trim($row["family_name"]); $dwc_Genus = trim($row["genus_name"]);
function get_mediaURL_for_first_40k_images() { require_library('connectors/BOLDSysAPI'); $func = new BOLDSysAPI(); $source = "http://localhost/eol_php_code/update_resources/connectors/files/MCZ_Harvard/MCZimages_still40k.tsv"; $destination = DOC_ROOT . "/update_resources/connectors/files/MCZ_Harvard/First40k.txt"; if ($temp_filepath = Functions::save_remote_file_to_local($source, array('timeout' => 4800, 'download_attempts' => 2))) { $records = array(); foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { if ($line) { $cols = explode("\t", $line); if (count($cols) == 80) { $records[str_replace('"', '', $cols[1])] = 1; } } } } $func::save_to_json_file(array_keys($records), $destination); }
private function get_families_xlsx() { require_library('XLSParser'); $parser = new XLSParser(); $families = array(); // for family table $family_table = array(); $fields = array("SpK", "K", "SbK", "IK", "SpP", "P", "SbP", "IP", "PvP", "SpC", "C", "SbC", "IC", "SpO", "O"); // $dropbox_xlsx[] = "http://tiny.cc/FALO"; // from Cyndy's Dropbox $dropbox_xlsx[] = "https://dl.dropboxusercontent.com/u/7597512/NCBI_GGI/ALF2015.xlsx"; // from Eli's Dropbox // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/FALO.xlsx"; // local // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/ALF2015.xlsx"; // local foreach ($dropbox_xlsx as $doc) { echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2, 'cache' => 1))) { $arr = $parser->convert_sheet_to_array($path); $i = 0; foreach ($arr["FAMILY"] as $family) { $family = trim(str_ireplace(array("Family", '"'), "", $family)); if (is_numeric($family)) { continue; } if ($family) { $families[$family] = ''; foreach ($fields as $field) { $family_table[$family][$field] = $arr[$field][$i]; } // for family table } $i++; } unlink($path); break; } else { echo "\n [{$doc}] unavailable! \n"; } } //save $family_table as json to text file, to be accessed later when generating the spreadsheet self::initialize_dump_file($this->temp_family_table_file); self::save_to_dump($family_table, $this->temp_family_table_file); echo "\n count family rows: " . count($family_table) . "\n"; unset($family_table); return array_keys($families); }
function download_and_extract_remote_file($file = false) { if (!$file) { $file = $this->data_dump_url; } // used when this function is called elsewhere $temp_path = Functions::save_remote_file_to_local($file, DOWNLOAD_WAIT_TIME, 999999, 5, "xml"); echo "\n [{$temp_path}] \n"; // shell_exec("gzip -d " . $temp_path); // return str_ireplace(".xml.gz", ".xml", $temp_path); return $temp_path; }
private function process_DL_taxon_list() { $temp_filepath = Functions::save_remote_file_to_local(self::DL_MAP_SPECIES_LIST, array('timeout' => 4800, 'download_attempts' => 5)); if (!$temp_filepath) { echo "\n\nExternal file not available. Program will terminate.\n"; return; } $i = 0; foreach (new FileIterator($temp_filepath, true) as $line_number => $line) { $i++; if ($line) { $m = 10000; $cont = false; if ($i >= 1 && $i < $m) { $cont = true; } // if($i >= $m && $i < $m*2) $cont = true; // if($i >= $m*2 && $i < $m*3) $cont = true; // if($i >= $m*3 && $i < $m*4) $cont = true; // if($i >= $m*4 && $i < $m*5) $cont = true; if (!$cont) { continue; } $arr = explode("\t", $line); $sciname = trim($arr[0]); echo "\n[{$sciname}]\n"; self::main_loop($sciname); } // if($i >= 5) break; //debug } }
<?php namespace php_active_record; /* connector for ZooKeys estimated execution time: 1 minute Connector reads the XML provided by partner and moves the <agent>s with role = 'source' to bibliographicCitation */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('ResourceDataObjectElementsSetting'); $timestart = time_elapsed(); $resource_id = 20; $resource_path = Functions::get_accesspoint_url_if_available($resource_id, "http://www.pensoft.net/J_FILES/EoLData/ZooKeys.xml"); echo "\n processing resource: {$resource_path} \n"; if ($local_path = Functions::save_remote_file_to_local($resource_path, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 86400, 'download_attempts' => 3, 'delay_in_minutes' => 2))) { $func = new ResourceDataObjectElementsSetting($resource_id, $local_path); $dataObjects = get_values($local_path); $xml = remove_elements($local_path); $func->save_resource_document($xml); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $xml = fill_up_values($resource_path, $dataObjects); $func->save_resource_document($xml); unlink($local_path); } //start creating the archive file using the generated EOL XML file above require_library('connectors/ConvertEOLtoDWCaAPI'); $resource_id = 20; $params["eol_xml_file"] = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $params["filename"] = "no need to mention here.xml"; $params["dataset"] = "Pensoft XML files";