Ejemplo n.º 1
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url);
         $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR");
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
Ejemplo n.º 2
0
 private function parse_xls()
 {
     if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) {
         require_library('XLSParser');
         $parser = new XLSParser();
         debug("\n reading: " . $this->data_dump_url . "\n");
         $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0);
         $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year");
         $records = self::add_uppercase_fields($records);
         $records = self::fill_in_missing_names($records);
         $records = self::fill_in_parent_id($records);
         debug("\n" . count($records));
         return $records;
     }
 }
Ejemplo n.º 3
0
 function get_all_taxa()
 {
     require_library('XLSParser');
     $docs = count($this->spreadsheets);
     $doc_count = 0;
     foreach ($this->spreadsheets as $doc) {
         $doc_count++;
         echo "\n processing [{$doc}]...\n";
         if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
             $parser = new XLSParser();
             $arr = $parser->convert_sheet_to_array($path);
             $fields = array_keys($arr);
             $i = -1;
             $rows = count($arr["Species"]);
             echo "\n total {$path}: {$rows} \n";
             foreach ($arr["Species"] as $Species) {
                 $i++;
                 $rec = array();
                 foreach ($fields as $field) {
                     $rec[$field] = $arr[$field][$i];
                 }
                 $rec = array_map('trim', $rec);
                 /* breakdown when caching
                    $cont = false;
                    // if($i >= 1 && $i < 6000)         $cont = true;
                    // if($i >= 3000 && $i < 6000)      $cont = true;
                    // if($i >= 6000 && $i < 9000)      $cont = true;
                    // if($i >= 9000 && $i < 12000)     $cont = true;
                    // if($i >= 11800 && $i < 15000)    $cont = true;
                    if(!$cont) continue;
                    */
                 print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n";
                 $rec = self::clean_taxon_name($rec);
                 $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"]));
                 // remove parenthesis
                 $taxon_id = str_replace(" ", "_", $taxon_id);
                 $rec["taxon_id"] = md5($taxon_id);
                 self::create_instances_from_taxon_object($rec);
                 self::prepare_images($rec);
                 self::prepare_data($rec);
             }
             unlink($path);
         } else {
             echo "\n [{$doc}] unavailable! \n";
         }
     }
     $this->archive_builder->finalize(TRUE);
 }
Ejemplo n.º 4
0
 private function process_hotlist_spreadsheet()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $families = array();
     $doc = "http://localhost/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx";
     $doc = "http://localhost/~eolit/eli/eol_php_code/public/tmp/spreadsheets/SPG Hotlist Official Version.xlsx";
     //for MacBook
     echo "\n processing [{$doc}]...\n";
     if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         $arr = $parser->convert_sheet_to_array($path);
         $i = -1;
         foreach ($arr['Animals'] as $sciname) {
             $i++;
             $sciname = trim(Functions::canonical_form($sciname));
             if (stripos($sciname, " ") !== false) {
                 $taxon_concept_id = $arr['1'][$i];
                 echo "\n{$i}. [{$sciname}][{$taxon_concept_id}]";
                 //==================
                 $m = 10000;
                 $cont = false;
                 if ($i >= 1 && $i < $m) {
                     $cont = true;
                 }
                 // if($i >=  $m   && $i < $m*2)  $cont = true;
                 // if($i >=  $m*2 && $i < $m*3)  $cont = true;
                 // if($i >=  $m*3 && $i < $m*4)  $cont = true;
                 // if($i >=  $m*4 && $i < $m*5)  $cont = true;
                 // if($i >=  $m*5 && $i < $m*6)  $cont = true;
                 // if($i >=  $m*6 && $i < $m*7)  $cont = true;
                 if (!$cont) {
                     continue;
                 }
                 self::main_loop($sciname, $taxon_concept_id);
                 //==================
                 // break; //debug - process only 1
             }
         }
         unlink($path);
     } else {
         echo "\n [{$doc}] unavailable! \n";
     }
 }
Ejemplo n.º 5
0
 private function get_families_xlsx()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $families = array();
     // for family table
     $family_table = array();
     $fields = array("SpK", "K", "SbK", "IK", "SpP", "P", "SbP", "IP", "PvP", "SpC", "C", "SbC", "IC", "SpO", "O");
     // $dropbox_xlsx[] = "http://tiny.cc/FALO"; // from Cyndy's Dropbox
     $dropbox_xlsx[] = "https://dl.dropboxusercontent.com/u/7597512/NCBI_GGI/ALF2015.xlsx";
     // from Eli's Dropbox
     // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/FALO.xlsx"; // local
     // $dropbox_xlsx[] = "http://localhost/cp/NCBIGGI/ALF2015.xlsx"; // local
     foreach ($dropbox_xlsx as $doc) {
         echo "\n processing [{$doc}]...\n";
         if ($path = Functions::save_remote_file_to_local($doc, array("timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2, 'cache' => 1))) {
             $arr = $parser->convert_sheet_to_array($path);
             $i = 0;
             foreach ($arr["FAMILY"] as $family) {
                 $family = trim(str_ireplace(array("Family", '"'), "", $family));
                 if (is_numeric($family)) {
                     continue;
                 }
                 if ($family) {
                     $families[$family] = '';
                     foreach ($fields as $field) {
                         $family_table[$family][$field] = $arr[$field][$i];
                     }
                     // for family table
                 }
                 $i++;
             }
             unlink($path);
             break;
         } else {
             echo "\n [{$doc}] unavailable! \n";
         }
     }
     //save $family_table as json to text file, to be accessed later when generating the spreadsheet
     self::initialize_dump_file($this->temp_family_table_file);
     self::save_to_dump($family_table, $this->temp_family_table_file);
     echo "\n count family rows: " . count($family_table) . "\n";
     unset($family_table);
     return array_keys($families);
 }
Ejemplo n.º 6
0
function prepare_excluded_ids()
{
    require_library('XLSParser');
    $parser = new XLSParser();
    $filename = DOC_ROOT . "/update_resources/connectors/files/MorphBank/original-mb-upload-2010-11-22.xls";
    $excluded_ids = array();
    $arr = $parser->convert_sheet_to_array($filename);
    return $arr['Morphbank ID'];
}
Ejemplo n.º 7
0
 function prepare_taxa_list()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $taxa = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::TAXA_LIST_FILE), "single", "NAME", "NAME", "USFWS SPECIES PROFILE URL", "DISPLAYED TEXT", "SOURCE LIST");
     $parser = new XLSParser();
     $synonymy = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::NAME_SYNONYMY), "single", "USFWS", "USFWS", "EOL NAME");
     $parser = new XLSParser();
     $names_to_be_added = $parser->prepare_data($parser->convert_sheet_to_array(DOC_ROOT . self::TEMP_FILE_PATH . self::NAMES_TO_BE_ADDED), "single", "FWS NAMES TO ADD TO EOL", "FWS NAMES TO ADD TO EOL");
     echo "\n taxa: " . count($taxa);
     echo "\n synonymy: " . count($synonymy);
     echo "\n names_to_be_added: " . count($names_to_be_added);
     return array($taxa, $synonymy, $names_to_be_added);
 }
Ejemplo n.º 8
0
 private function prepare_rank_data()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $arr_rank = array();
     $arr = $parser->convert_sheet_to_array($this->OBIS_RANK_FILE);
     $i = 0;
     foreach ($arr['rank_id'] as $rank_id) {
         $arr_rank[$rank_id] = @$arr['rank_name'][$i];
         $i++;
     }
     return $arr_rank;
 }
Ejemplo n.º 9
0
 function compile_taxa($urls)
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $taxa_arr = array();
     foreach ($urls as $url) {
         $arr = self::prepare_table($parser->convert_sheet_to_array($url), "single", "SId", "SId", "GenusSpecies", "AuthorSpecies", "Family", "DistributionT", "OrderName", "Notes", "Habitat", "HabitatNotes", "DepthRange", "DepthRangeShallow", "DepthRangeDeep", "LengthMax", "LengthMaxSuffix", "LengthMaxType", "Journal", "Citation", "TextPage");
         $taxa_arr = array_merge($taxa_arr, $arr);
     }
     return $taxa_arr;
 }
 public function convert_spreadsheet($spreadsheet, $worksheet = null)
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     if ($path = Functions::save_remote_file_to_local($spreadsheet, $this->spreadsheet_options)) {
         $arr = $parser->convert_sheet_to_array($path, $worksheet);
         unlink($path);
         return $arr;
     } else {
         echo "\n [{$spreadsheet}] unavailable! \n";
     }
     return false;
 }
Ejemplo n.º 11
0
 function assemble_xml_files()
 {
     $arr_taxa = array();
     $arr_predator = array();
     $arr_prey = array();
     $arr_ref = array();
     for ($i = 1; $i <= 259; $i++) {
         print "\n {$i} ---" . SPIRE_SERVICE . $i;
         if (!($str = Functions::get_remote_file(SPIRE_SERVICE . $i))) {
             echo "\n\nSPIRE service not available at the moment.\n\n";
             return false;
         }
         $str = str_replace('rdf:resource', 'rdf_resource', $str);
         $str = utf8_encode($str);
         $xml = simplexml_load_string($str);
         foreach ($xml->ConfirmedFoodWebLink as $rec) {
             foreach ($rec->predator[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $predator = trim(@$arr['fragment']);
                 $predator = str_replace("_", " ", $predator);
             }
             $pred_desc = trim($rec->predator_description);
             foreach ($rec->prey[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $prey = trim(@$arr['fragment']);
                 $prey = str_replace("_", " ", $prey);
             }
             $prey_desc = trim($rec->prey_description);
             foreach ($rec->observedInStudy[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $ref_num = trim($arr['fragment']);
             }
             $arr_taxa[$predator]['desc'] = $pred_desc;
             $arr_taxa[$prey]['desc'] = $prey_desc;
             if (!@$arr_predator[$predator]) {
                 $arr_predator[$predator][] = $prey;
             }
             if (!@$arr_prey[$prey]) {
                 $arr_prey[$prey][] = $predator;
             }
             if (!in_array($prey, $arr_predator[$predator])) {
                 $arr_predator[$predator][] = $prey;
             }
             if (!in_array($predator, $arr_prey[$prey])) {
                 $arr_prey[$prey][] = $predator;
             }
             if (!@$arr_ref[$ref_num]['predator']) {
                 $arr_ref[$ref_num]['predator'][] = $predator;
             }
             if (!@$arr_ref[$ref_num]['prey']) {
                 $arr_ref[$ref_num]['prey'][] = $prey;
             }
             if (!in_array($predator, $arr_ref[$ref_num]['predator'])) {
                 $arr_ref[$ref_num]['predator'][] = $predator;
             }
             if (!in_array($prey, $arr_ref[$ref_num]['prey'])) {
                 $arr_ref[$ref_num]['prey'][] = $prey;
             }
         }
         foreach ($xml->Study as $rec) {
             $habitats = array();
             foreach ($rec->ofHabitat as $habitat) {
                 foreach ($habitat->attributes() as $attribute => $value) {
                     $arr = parse_url($value);
                     $habitat = trim($arr['fragment']);
                     $habitats[] = str_replace("_", " ", $habitat);
                 }
             }
             $habitats = implode(", ", $habitats);
             if ($habitats == "unknown") {
                 $habitats = "";
             }
             $place = self::parse_locality(trim($rec->locality));
             $country = @$place["country"];
             $state = @$place["state"];
             $locality = @$place["locality"];
             //debug
             /*
             if  (   is_numeric(stripos(trim($rec->titleAndAuthors),"Animal Diversity Web"))     ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Rockefeller"))              ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"data base of food webs"))   ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"foodwebs"))                 ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Webs on the Web"))          ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"NCEAS"))                    ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Interaction Web Database")) ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Co-Operative Web Bank"))
                 )
             {print"\n problem here: [$i] [trim($rec->titleAndAuthors)]";}
             */
             $titleAndAuthors = trim($rec->titleAndAuthors);
             if ($titleAndAuthors == "Animal Diversity Web") {
                 $titleAndAuthors = "Myers, P., R. Espinosa, C. S. Parr, T. Jones, G. S. Hammond, and T. A. Dewey. 2006. The Animal Diversity Web (online). Accessed February 16, 2011 at http://animaldiversity.org. http://www.animaldiversity.org";
             }
             $reference[$ref_num] = array("titleAndAuthors" => $titleAndAuthors, "publicationYear" => trim($rec->publicationYear), "place" => trim($rec->locality), "country" => $country, "state" => $state, "locality" => $locality, "habitat" => $habitats);
         }
     }
     //main loop 1-259
     //for ancestry
     require_library('XLSParser');
     $parser = new XLSParser();
     $names = $parser->convert_sheet_to_array(SPIRE_PATH_ANCESTRY);
     $ancestry = array();
     foreach ($arr_taxa as $taxon => $temp) {
         $arr_taxa[$taxon]['objects'] = array("predator" => @$arr_predator[$taxon], "prey" => @$arr_prey[$taxon]);
         //start ancestry
         $key = array_search(trim($taxon), $names['tname']);
         if (strval($key) != "") {
             $parent_id = $names['parent_id'][$key];
             $ancestry = self::get_ancestry($key, $names);
             $arr_taxa[$taxon]['ancestry'] = $ancestry;
         }
     }
     /*
     print"<pre>";
         print_r($arr_taxa);
         print_r($arr_ref);
         print_r($reference);
     print"</pre>";
     */
     return array($arr_taxa, $arr_ref, $reference);
 }
Ejemplo n.º 12
0
 public function convert_to_old_schema_xml()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $xml = $parser->create_eol_xml($this->path_to_spreadsheet);
     $output_file = $this->output_file();
     if (!($OUT = fopen($output_file, "w+"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $output_file);
         return;
     }
     fwrite($OUT, $xml);
     fclose($OUT);
     return $output_file;
 }
 public static function prepare_acknowledgement()
 {
     require_library('XLSParser');
     $parser = new XLSParser();
     $arr = $parser->convert_sheet_to_array(DOC_ROOT . "update_resources/connectors/files/NaturalHistoryServices/Acknowledgments.xls");
     $acknowledgement = array();
     $k = 0;
     foreach ($arr["sciname"] as $sciname) {
         $sci = trim(str_ireplace(".mp4", "", $sciname));
         for ($i = 1; $i <= 3; $i++) {
             if (@$arr["person" . $i][$k]) {
                 $acknowledgement[$sci][] = @$arr["person" . $i][$k];
             }
         }
         $k++;
     }
     return $acknowledgement;
 }