private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url); $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR"); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0); $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year"); $records = self::add_uppercase_fields($records); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
function get_all_taxa() { require_library('XLSParser'); $docs = count($this->spreadsheets); $doc_count = 0; foreach ($this->spreadsheets as $doc) { $doc_count++; echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) { $parser = new XLSParser(); $arr = $parser->convert_sheet_to_array($path); $fields = array_keys($arr); $i = -1; $rows = count($arr["Species"]); echo "\n total {$path}: {$rows} \n"; foreach ($arr["Species"] as $Species) { $i++; $rec = array(); foreach ($fields as $field) { $rec[$field] = $arr[$field][$i]; } $rec = array_map('trim', $rec); /* breakdown when caching $cont = false; // if($i >= 1 && $i < 6000) $cont = true; // if($i >= 3000 && $i < 6000) $cont = true; // if($i >= 6000 && $i < 9000) $cont = true; // if($i >= 9000 && $i < 12000) $cont = true; // if($i >= 11800 && $i < 15000) $cont = true; if(!$cont) continue; */ print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n"; $rec = self::clean_taxon_name($rec); $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"])); // remove parenthesis $taxon_id = str_replace(" ", "_", $taxon_id); $rec["taxon_id"] = md5($taxon_id); self::create_instances_from_taxon_object($rec); self::prepare_images($rec); self::prepare_data($rec); } unlink($path); } else { echo "\n [{$doc}] unavailable! \n"; } } $this->archive_builder->finalize(TRUE); }
private function process_hotlist_spreadsheet() { require_library('XLSParser'); $parser = new XLSParser(); $families = array(); $doc = "http://localhost/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx"; $doc = "http://localhost/~eolit/eli/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx"; //for MacBook echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) { $arr = $parser->convert_sheet_to_array($path); $i = -1; foreach ($arr['Animals'] as $sciname) { $i++; $sciname = trim(Functions::canonical_form($sciname)); if (stripos($sciname, " ") !== false) { $taxon_concept_id = $arr['1'][$i]; echo "\n{$i}. [{$sciname}][{$taxon_concept_id}]"; //================== $m = 10000; $cont = false; if ($i >= 1 && $i < $m) { $cont = true; } // if($i >= $m && $i < $m*2) $cont = true; // if($i >= $m*2 && $i < $m*3) $cont = true; // if($i >= $m*3 && $i < $m*4) $cont = true; // if($i >= $m*4 && $i < $m*5) $cont = true; // if($i >= $m*5 && $i < $m*6) $cont = true; // if($i >= $m*6 && $i < $m*7) $cont = true; if (!$cont) { continue; } self::main_loop($sciname, $taxon_concept_id); //================== // break; //debug - process only 1 } } unlink($path); } else { echo "\n [{$doc}] unavailable! \n"; } }
private function get_families_xlsx() { require_library('XLSParser'); $parser = new XLSParser(); $families = array(); // for family table $family_table = array(); $fields = array("SpK", "K", "SbK", "IK", "SpP", "P", "SbP", "IP", "PvP", "SpC", "C", "SbC", "IC", "SpO", "O"); // $dropbox_xlsx[] = "http://tiny.cc/FALO"; // from Cyndy's Dropbox $dropbox_xlsx[] = "https://dl.dropboxusercontent.com/u/7597512/NCBI_GGI/ALF2015.xlsx"; // from Eli's Dropbox // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/FALO.xlsx"; // local // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/ALF2015.xlsx"; // local foreach ($dropbox_xlsx as $doc) { echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2, 'cache' => 1))) { $arr = $parser->convert_sheet_to_array($path); $i = 0; foreach ($arr["FAMILY"] as $family) { $family = trim(str_ireplace(array("Family", '"'), "", $family)); if (is_numeric($family)) { continue; } if ($family) { $families[$family] = ''; foreach ($fields as $field) { $family_table[$family][$field] = $arr[$field][$i]; } // for family table } $i++; } unlink($path); break; } else { echo "\n [{$doc}] unavailable! \n"; } } //save $family_table as json to text file, to be accessed later when generating the spreadsheet self::initialize_dump_file($this->temp_family_table_file); self::save_to_dump($family_table, $this->temp_family_table_file); echo "\n count family rows: " . count($family_table) . "\n"; unset($family_table); return array_keys($families); }
function prepare_excluded_ids() { require_library('XLSParser'); $parser = new XLSParser(); $filename = DOC_ROOT . "/update_resources/connectors/files/MorphBank/original-mb-upload-2010-11-22.xls"; $excluded_ids = array(); $arr = $parser->convert_sheet_to_array($filename); return $arr['Morphbank ID']; }
function prepare_taxa_list() { require_library('XLSParser'); $parser = new XLSParser(); $taxa = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::TAXA_LIST_FILE), "single", "NAME", "NAME", "USFWS SPECIES PROFILE URL", "DISPLAYED TEXT", "SOURCE LIST"); $parser = new XLSParser(); $synonymy = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::NAME_SYNONYMY), "single", "USFWS", "USFWS", "EOL NAME"); $parser = new XLSParser(); $names_to_be_added = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::NAMES_TO_BE_ADDED), "single", "FWS NAMES TO ADD TO EOL", "FWS NAMES TO ADD TO EOL"); echo "\n taxa: " . count($taxa); echo "\n synonymy: " . count($synonymy); echo "\n names_to_be_added: " . count($names_to_be_added); return array($taxa, $synonymy, $names_to_be_added); }
private function prepare_rank_data() { require_library('XLSParser'); $parser = new XLSParser(); $arr_rank = array(); $arr = $parser->convert_sheet_to_array($this->OBIS_RANK_FILE); $i = 0; foreach ($arr['rank_id'] as $rank_id) { $arr_rank[$rank_id] = @$arr['rank_name'][$i]; $i++; } return $arr_rank; }
function compile_taxa($urls) { require_library('XLSParser'); $parser = new XLSParser(); $taxa_arr = array(); foreach ($urls as $url) { $arr = self::prepare_table($parser->convert_sheet_to_array($url), "single", "SId", "SId", "GenusSpecies", "AuthorSpecies", "Family", "DistributionT", "OrderName", "Notes", "Habitat", "HabitatNotes", "DepthRange", "DepthRangeShallow", "DepthRangeDeep", "LengthMax", "LengthMaxSuffix", "LengthMaxType", "Journal", "Citation", "TextPage"); $taxa_arr = array_merge($taxa_arr, $arr); } return $taxa_arr; }
public function convert_spreadsheet($spreadsheet, $worksheet = null) { require_library('XLSParser'); $parser = new XLSParser(); if ($path = Functions::save_remote_file_to_local($spreadsheet, $this->spreadsheet_options)) { $arr = $parser->convert_sheet_to_array($path, $worksheet); unlink($path); return $arr; } else { echo "\n [{$spreadsheet}] unavailable! \n"; } return false; }
function assemble_xml_files() { $arr_taxa = array(); $arr_predator = array(); $arr_prey = array(); $arr_ref = array(); for ($i = 1; $i <= 259; $i++) { print "\n {$i} ---" . SPIRE_SERVICE . $i; if (!($str = Functions::get_remote_file(SPIRE_SERVICE . $i))) { echo "\n\nSPIRE service not available at the moment.\n\n"; return false; } $str = str_replace('rdf:resource', 'rdf_resource', $str); $str = utf8_encode($str); $xml = simplexml_load_string($str); foreach ($xml->ConfirmedFoodWebLink as $rec) { foreach ($rec->predator[0]->attributes() as $attribute => $value) { $arr = parse_url($value); $predator = trim(@$arr['fragment']); $predator = str_replace("_", " ", $predator); } $pred_desc = trim($rec->predator_description); foreach ($rec->prey[0]->attributes() as $attribute => $value) { $arr = parse_url($value); $prey = trim(@$arr['fragment']); $prey = str_replace("_", " ", $prey); } $prey_desc = trim($rec->prey_description); foreach ($rec->observedInStudy[0]->attributes() as $attribute => $value) { $arr = parse_url($value); $ref_num = trim($arr['fragment']); } $arr_taxa[$predator]['desc'] = $pred_desc; $arr_taxa[$prey]['desc'] = $prey_desc; if (!@$arr_predator[$predator]) { $arr_predator[$predator][] = $prey; } if (!@$arr_prey[$prey]) { $arr_prey[$prey][] = $predator; } if (!in_array($prey, $arr_predator[$predator])) { $arr_predator[$predator][] = $prey; } if (!in_array($predator, $arr_prey[$prey])) { $arr_prey[$prey][] = $predator; } if (!@$arr_ref[$ref_num]['predator']) { $arr_ref[$ref_num]['predator'][] = $predator; } if (!@$arr_ref[$ref_num]['prey']) { $arr_ref[$ref_num]['prey'][] = $prey; } if (!in_array($predator, $arr_ref[$ref_num]['predator'])) { $arr_ref[$ref_num]['predator'][] = $predator; } if (!in_array($prey, $arr_ref[$ref_num]['prey'])) { $arr_ref[$ref_num]['prey'][] = $prey; } } foreach ($xml->Study as $rec) { $habitats = array(); foreach ($rec->ofHabitat as $habitat) { foreach ($habitat->attributes() as $attribute => $value) { $arr = parse_url($value); $habitat = trim($arr['fragment']); $habitats[] = str_replace("_", " ", $habitat); } } $habitats = implode(", ", $habitats); if ($habitats == "unknown") { $habitats = ""; } $place = self::parse_locality(trim($rec->locality)); $country = @$place["country"]; $state = @$place["state"]; $locality = @$place["locality"]; //debug /* if ( is_numeric(stripos(trim($rec->titleAndAuthors),"Animal Diversity Web")) || is_numeric(stripos(trim($rec->titleAndAuthors),"Rockefeller")) || is_numeric(stripos(trim($rec->titleAndAuthors),"data base of food webs")) || is_numeric(stripos(trim($rec->titleAndAuthors),"foodwebs")) || is_numeric(stripos(trim($rec->titleAndAuthors),"Webs on the Web")) || is_numeric(stripos(trim($rec->titleAndAuthors),"NCEAS")) || is_numeric(stripos(trim($rec->titleAndAuthors),"Interaction Web Database")) || is_numeric(stripos(trim($rec->titleAndAuthors),"Co-Operative Web Bank")) ) {print"\n problem here: [$i] [trim($rec->titleAndAuthors)]";} */ $titleAndAuthors = trim($rec->titleAndAuthors); if ($titleAndAuthors == "Animal Diversity Web") { $titleAndAuthors = "Myers, P., R. Espinosa, C. S. Parr, T. Jones, G. S. Hammond, and T. A. Dewey. 2006. The Animal Diversity Web (online). Accessed February 16, 2011 at http://animaldiversity.org. http://www.animaldiversity.org"; } $reference[$ref_num] = array("titleAndAuthors" => $titleAndAuthors, "publicationYear" => trim($rec->publicationYear), "place" => trim($rec->locality), "country" => $country, "state" => $state, "locality" => $locality, "habitat" => $habitats); } } //main loop 1-259 //for ancestry require_library('XLSParser'); $parser = new XLSParser(); $names = $parser->convert_sheet_to_array(SPIRE_PATH_ANCESTRY); $ancestry = array(); foreach ($arr_taxa as $taxon => $temp) { $arr_taxa[$taxon]['objects'] = array("predator" => @$arr_predator[$taxon], "prey" => @$arr_prey[$taxon]); //start ancestry $key = array_search(trim($taxon), $names['tname']); if (strval($key) != "") { $parent_id = $names['parent_id'][$key]; $ancestry = self::get_ancestry($key, $names); $arr_taxa[$taxon]['ancestry'] = $ancestry; } } /* print"<pre>"; print_r($arr_taxa); print_r($arr_ref); print_r($reference); print"</pre>"; */ return array($arr_taxa, $arr_ref, $reference); }
public function convert_to_old_schema_xml() { require_library('XLSParser'); $parser = new XLSParser(); $xml = $parser->create_eol_xml($this->path_to_spreadsheet); $output_file = $this->output_file(); if (!($OUT = fopen($output_file, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $output_file); return; } fwrite($OUT, $xml); fclose($OUT); return $output_file; }
public static function prepare_acknowledgement() { require_library('XLSParser'); $parser = new XLSParser(); $arr = $parser->convert_sheet_to_array(DOC_ROOT . "update_resources/connectors/files/NaturalHistoryServices/Acknowledgments.xls"); $acknowledgement = array(); $k = 0; foreach ($arr["sciname"] as $sciname) { $sci = trim(str_ireplace(".mp4", "", $sciname)); for ($i = 1; $i <= 3; $i++) { if (@$arr["person" . $i][$k]) { $acknowledgement[$sci][] = @$arr["person" . $i][$k]; } } $k++; } return $acknowledgement; }