Ejemplo n.º 1
0
 private function process_html($html_path)
 {
     // <i><A href="362a.htm">Gryllotalpa cultriger</a></i>
     if ($html = Functions::get_remote_file($html_path, array('timeout' => 999999, 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         if (preg_match_all("/<i><A href=\"(.*?)\"/ims", $html, $arr)) {
             foreach ($arr[1] as $string) {
                 $string = str_ireplace("a.htm", "m.htm", $string);
                 // $string = "302m.htm"; //123m.htm 071m.htm 318m.htm //debug
                 $url = $this->sina_domain . $string;
                 $urls = array();
                 $urls[] = $url;
                 $id = intval($string);
                 echo "\n id: [{$id}]";
                 if (isset($this->additional_maps[$id])) {
                     foreach ($this->additional_maps[$id] as $url) {
                         $urls[] = $url;
                     }
                 }
                 foreach ($urls as $url) {
                     if ($rec = self::get_map_data($url)) {
                         $parts = pathinfo(@$rec["map"]);
                         $rec["taxon_id"] = intval($parts["filename"]);
                         if (!$rec["taxon_id"]) {
                             echo "\n investigate blank taxon_id [{$url}]\n";
                             continue;
                         }
                         $rec["source_url"] = $this->sina_domain . Functions::format_number_with_leading_zeros($rec["taxon_id"], 3) . "a.htm";
                         $this->create_instances_from_taxon_object($rec, array());
                         $ref_ids = array();
                         $agent_ids = array();
                         $rec["caption"] = "Version of manually-generated dot map displayed above, showing U.S. and Canadian records, was harvested from SINA on " . date("M-d-Y") . ".<br><br>" . @$rec["caption"];
                         if (@$rec["map"]) {
                             self::get_images($rec["sciname"], @$rec["caption"], $rec["taxon_id"], $parts["filename"], $rec["map"], $rec["source_url"], $ref_ids, $agent_ids);
                         }
                         if (@$rec["computer_gen_map"]) {
                             $parts = pathinfo($rec["computer_gen_map"]);
                             $ref_ids = array();
                             $agent_ids = array();
                             $caption = $rec["as_of"];
                             if ($rec["link_back"]) {
                                 $caption .= "<br><br>" . 'See also this <a href="' . $rec["link_back"] . '">manually generated dot map</a> showing U.S. and Canadian records, with shaded area showing likely general distribution.';
                             }
                             self::get_images($rec["sciname"], $caption, $rec["taxon_id"], $parts["filename"], $rec["computer_gen_map"], $rec["source_url"], $ref_ids, $agent_ids);
                         }
                     }
                     // break; //debug
                 }
                 // break; //debug
             }
         } else {
             echo "\n investigate 01 [{$html_path}]";
         }
     } else {
         echo "\n investigate 02 [{$html_path}]";
     }
 }
Ejemplo n.º 2
0
function utility_append_text_loop()
{
    echo "\n backing up first...";
    $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump.txt";
    copy($filename, DOC_ROOT . "/public/tmp/mycobank/mycobank_dump_backup.txt");
    echo "\n backup done. \n";
    for ($x = 1; $x <= 1; $x++) {
        $str = Functions::format_number_with_leading_zeros($x, "2");
        $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump_add" . $str . ".txt";
        if (!($READ = Functions::file_open($filename, "r"))) {
            return;
        }
        $contents = fread($READ, filesize($filename));
        fclose($READ);
        echo "\n copying... {$filename}";
        $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump.txt";
        echo "\n to... {$filename}\n";
        if (!($WRITE = Functions::file_open($filename, "a"))) {
            return;
        }
        fwrite($WRITE, $contents);
        fclose($WRITE);
    }
}
Ejemplo n.º 3
0
 private function count_taxa_per_phylum($arr_phylum)
 {
     $total_phylum = sizeof($arr_phylum);
     $p = 0;
     $records = array();
     $file_count = 0;
     foreach ($arr_phylum as $phylum) {
         $p++;
         $phylum_path = PHYLUM_SERVICE_URL . $phylum['name'];
         // $phylum_path = "http://localhost/eol_php_code/update_resources/connectors/files/BOLD/Annelida.xml"; // debug
         echo "\n\nphylum service: " . $phylum_path . "\n";
         $response = Functions::lookup_with_cache($phylum_path, $this->download_options);
         if ($xml = simplexml_load_string($response)) {
             echo "\n [{$p} of {$total_phylum}] {$phylum['name']} {$phylum['id']} -- [" . sizeof($xml->record) . "]";
             $i = 0;
             foreach ($xml->record as $rec) {
                 $i++;
                 $records[] = $rec;
                 if (sizeof($records) >= 8000) {
                     $file_count++;
                     self::save_to_json_file($records, $this->TEMP_FILE_PATH . "sl_batch_" . Functions::format_number_with_leading_zeros($file_count, 3) . ".txt");
                     $records = array();
                 }
                 // if($i >= 20) break; //debug
             }
         } else {
             echo "\n\n Cannot access: " . $phylum_path;
             self::log_cannot_access_phylum($phylum_path);
         }
         sleep(10);
     }
     //last save
     if ($records) {
         $file_count++;
         self::save_to_json_file($records, $this->TEMP_FILE_PATH . "sl_batch_" . Functions::format_number_with_leading_zeros($file_count, 3) . ".txt");
     }
     //create work_list
     $str = "";
     for ($i = 1; $i <= $file_count; $i++) {
         $str .= "sl_batch_" . Functions::format_number_with_leading_zeros($i, 3) . "\n";
     }
     if ($fp = fopen($this->WORK_LIST, "w")) {
         fwrite($fp, $str);
         fclose($fp);
     }
 }
Ejemplo n.º 4
0
 private function divide_text_file($divisor)
 {
     $temp_filepath = Functions::save_remote_file_to_local(self::DL_MAP_SPECIES_LIST, array('timeout' => 4800, 'download_attempts' => 5));
     if (!$temp_filepath) {
         echo "\n\nExternal file not available. Program will terminate.\n";
         return;
     }
     $i = 0;
     $file_ctr = 0;
     $str = "";
     print "\n";
     foreach (new FileIterator($temp_filepath, true) as $line_number => $line) {
         if ($line) {
             $line .= "\n";
             // FileIterator removes the carriage-return char
             $i++;
             $str .= $line;
             print "{$i}. {$line}\n";
             if ($i == $divisor) {
                 print "\n";
                 $file_ctr++;
                 $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
                 if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
                     return;
                 }
                 fwrite($OUT, $str);
                 fclose($OUT);
                 $str = "";
                 $i = 0;
             }
         }
         //if($i >= 5) break; //debug
     }
     //last writes
     if ($str) {
         $file_ctr++;
         $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
         if (!($OUT = Functions::file_open($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
             return;
         }
         fwrite($OUT, $str);
         fclose($OUT);
     }
     //create work_list
     $str = "";
     for ($i = 1; $i <= $file_ctr; $i++) {
         $str .= "batch_" . Functions::format_number_with_leading_zeros($i, 3) . "\n";
     }
     $filename = $this->TEMP_FILE_PATH . "work_list.txt";
     if ($fp = Functions::file_open($filename, "w")) {
         fwrite($fp, $str);
         fclose($fp);
     }
 }
Ejemplo n.º 5
0
 private function get_date_ranges($start_year, $month = NULL)
 {
     $range = array();
     if (!$month) {
         $current_year = date("Y");
         for ($year = $start_year; $year <= $current_year; $year++) {
             if ($year == $current_year) {
                 $month_limit = date("n");
             } else {
                 $month_limit = 12;
             }
             for ($month = 1; $month <= $month_limit; $month++) {
                 $start_date = $year . "-" . Functions::format_number_with_leading_zeros($month, 2) . "-01";
                 $end_date = $year . "-" . Functions::format_number_with_leading_zeros($month, 2) . "-31";
                 $range[] = self::get_timestamp_range($start_date, $end_date);
             }
         }
     } else {
         $month = Functions::format_number_with_leading_zeros($month, 2);
         for ($day = 1; $day <= 30; $day++) {
             $start_date = $start_year . "-" . $month . "-" . Functions::format_number_with_leading_zeros($day, 2);
             $end_date = $start_year . "-" . $month . "-" . Functions::format_number_with_leading_zeros($day + 1, 2);
             $range[] = self::get_timestamp_range($start_date, $end_date);
         }
         if ($month == "12") {
             $next_year = $start_year + 1;
             $next_month = "01";
         } else {
             $next_year = $start_year;
             $next_month = Functions::format_number_with_leading_zeros(intval($month) + 1, 2);
         }
         $start_date = $start_year . "-" . $month . "-31";
         $end_date = $next_year . "-" . $next_month . "-01";
         $range[] = self::get_timestamp_range($start_date, $end_date);
     }
     return $range;
 }
Ejemplo n.º 6
0
 function divide_text_file($divisor)
 {
     $i = 0;
     $file_ctr = 0;
     $str = "";
     foreach (new FileIterator(TROPICOS_NAME_EXPORT_FILE) as $line_number => $line) {
         if ($line) {
             $line .= "\n";
             // FileIterator removes the carriage-return
             $i++;
             $str .= $line;
             echo "\n {$i}. {$line}";
             if ($i == $divisor) {
                 $file_ctr++;
                 $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 2);
                 if (!($OUT = fopen($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
                     debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt");
                     return;
                 }
                 fwrite($OUT, $str);
                 fclose($OUT);
                 $str = "";
                 $i = 0;
             }
         }
     }
     //last writes
     if ($str) {
         $file_ctr++;
         $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 2);
         if (!($OUT = fopen($this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt", "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->TEMP_FILE_PATH . "batch_" . $file_ctr_str . ".txt");
             return;
         }
         fwrite($OUT, $str);
         fclose($OUT);
     }
     //create TROPICOS_work_list
     $str = "";
     for ($i = 1; $i <= $file_ctr; $i++) {
         $str .= "batch_" . Functions::format_number_with_leading_zeros($i, 2) . "\n";
     }
     $filename = $this->WORK_LIST;
     if ($OUT = fopen($filename, "w+")) {
         fwrite($OUT, $str);
         fclose($OUT);
     }
 }
Ejemplo n.º 7
0
 function combine_all_xmls($resource_id)
 {
     debug("\n\n Start compiling all XML...");
     $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
     if (!($OUT = fopen($old_resource_path, "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path);
         return;
     }
     $str = "<?xml version='1.0' encoding='utf-8' ?>\n";
     $str .= "<response\n";
     $str .= "  xmlns='http://www.eol.org/transfer/content/0.3'\n";
     $str .= "  xmlns:xsd='http://www.w3.org/2001/XMLSchema'\n";
     $str .= "  xmlns:dc='http://purl.org/dc/elements/1.1/'\n";
     $str .= "  xmlns:dcterms='http://purl.org/dc/terms/'\n";
     $str .= "  xmlns:geo='http://www.w3.org/2003/01/geo/wgs84_pos#'\n";
     $str .= "  xmlns:dwc='http://rs.tdwg.org/dwc/dwcore/'\n";
     $str .= "  xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n";
     $str .= "  xsi:schemaLocation='http://www.eol.org/transfer/content/0.3 http://services.eol.org/schema/content_0_3.xsd'>\n";
     fwrite($OUT, $str);
     $i = 0;
     while (true) {
         $i++;
         $i_str = Functions::format_number_with_leading_zeros($i, 3);
         $filename = $this->TEMP_FILE_PATH . "temp_worms_" . "batch_" . $i_str . ".xml";
         if (!is_file($filename)) {
             echo " -end compiling XML's- ";
             break;
         }
         echo " {$i} ";
         if (!($READ = fopen($filename, "r"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $filename);
             return;
         }
         $contents = fread($READ, filesize($filename));
         fclose($READ);
         if ($contents) {
             fwrite($OUT, $contents);
         } else {
             echo "\n no contents {$i}";
         }
     }
     fwrite($OUT, "</response>");
     fclose($OUT);
     echo "\n All XML compiled\n\n";
 }
Ejemplo n.º 8
0
 public function create_work_list_from_master_file($master_file, $divisor, $destination_folder, $filename_prefix, $work_list)
 {
     if (!($FILE = Functions::file_open($master_file, "r"))) {
         echo "\n File not found: \n {$master_file} \n Program will terminate.\n\n";
         return false;
     }
     $i = 0;
     $file_ctr = 0;
     $str = "";
     while (!feof($FILE)) {
         if ($line = fgets($FILE)) {
             $i++;
             $str .= $line;
             print "\n{$i}. {$line}";
             if ($i == $divisor) {
                 $file_ctr++;
                 $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
                 if (!($OUT = Functions::file_open($destination_folder . $filename_prefix . $file_ctr_str . ".txt", "w"))) {
                     return;
                 }
                 fwrite($OUT, $str);
                 fclose($OUT);
                 $str = "";
                 $i = 0;
             }
         }
     }
     //last writes
     if ($str) {
         $file_ctr++;
         $file_ctr_str = Functions::format_number_with_leading_zeros($file_ctr, 3);
         if (!($OUT = Functions::file_open($destination_folder . $filename_prefix . $file_ctr_str . ".txt", "w"))) {
             return;
         }
         fwrite($OUT, $str);
         fclose($OUT);
     }
     //create work_list
     $str = "";
     for ($i = 1; $i <= $file_ctr; $i++) {
         $str .= $filename_prefix . Functions::format_number_with_leading_zeros($i, 3) . "\n";
     }
     if ($fp = Functions::file_open($work_list, "w")) {
         fwrite($fp, $str);
         fclose($fp);
     }
     return $file_ctr;
     // total number of work tasks
 }
Ejemplo n.º 9
0
 private function delete_temp_files($file_path, $file_extension)
 {
     $i = 0;
     while (true) {
         $i++;
         $i_str = Functions::format_number_with_leading_zeros($i, 3);
         $filename = $file_path . $i_str . "." . $file_extension;
         if (file_exists($filename)) {
             print "\n unlink: {$filename}";
             unlink($filename);
         } else {
             return;
         }
     }
 }
Ejemplo n.º 10
0
 public function get_all_taxa($resource_id)
 {
     $all_taxa = array();
     $this->used_collection_ids = array();
     $urls = array(FWP_SPECIES_DOC_PATH);
     // you can have multiple sources of the species.xls
     $taxa_arr = self::compile_taxa($urls);
     require_library('XLSParser');
     $parser = new XLSParser();
     $images = self::prepare_table($parser->convert_sheet_to_array(FWP_IMAGES_DOC_PATH), "multiple", "SId", "SId", "PictureId", "dbo_Picture_PictureNote", "PictureType", "IsLegal", "Location", "PicComments", "IsAvailable", "LifeStage", "CollectionName", "CollectionAcronym", "PictureSource", "Surname", "Firstname", "DisplayName", "FileName");
     $comnames = self::prepare_table($parser->convert_sheet_to_array(FWP_COMNAMES_DOC_PATH), "multiple", "SId", "CommonName", "Language");
     $synonyms = self::prepare_table($parser->convert_sheet_to_array(FWP_SYNONYMS_DOC_PATH), "multiple", "SId", "SynGenusSpecies", "SynStatus");
     $i = 1;
     $total = sizeof($taxa_arr);
     $j = 0;
     foreach ($taxa_arr as $taxon_arr) {
         echo "\n {$i} of {$total} -- " . $taxon_arr['SId'];
         $i++;
         $taxon_id = $taxon_arr['SId'];
         $page_taxa = self::get_fishwise_taxa($taxon_arr, @$images[$taxon_id], @$comnames[$taxon_id], @$synonyms[$taxon_id]);
         $all_taxa = array_merge($all_taxa, $page_taxa);
         if ($i % 10000 == 0) {
             $j++;
             $xml = \SchemaDocument::get_taxon_xml($all_taxa);
             $j_str = Functions::format_number_with_leading_zeros($j, 3);
             $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml";
             if (!($OUT = fopen($resource_path, "w+"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
                 return;
             }
             fwrite($OUT, $xml);
             fclose($OUT);
             $all_taxa = array();
         }
     }
     if ($all_taxa) {
         $j++;
         $xml = \SchemaDocument::get_taxon_xml($all_taxa);
         $j_str = Functions::format_number_with_leading_zeros($j, 3);
         $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml";
         if (!($OUT = fopen($resource_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
             return;
         }
         fwrite($OUT, $xml);
         fclose($OUT);
     }
     Functions::combine_all_eol_resource_xmls($resource_id, DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml");
     self::delete_files(DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml");
     return;
 }
Ejemplo n.º 11
0
 private function save_data_to_text($params = false, $search_service = false, $searches_per_dump = 1000)
 {
     $this->dump_no++;
     $partial_dump = str_replace("mycobank_dump.txt", "partial", $this->dump_file);
     $partial_dump .= "_" . Functions::format_number_with_leading_zeros($this->dump_no, 3) . ".txt";
     if (!$params) {
         $params = self::get_params_for_webservice();
     }
     $total_params = count($params);
     $i = 0;
     foreach ($params as $param) {
         $param = trim(ucfirst($param));
         print "\n searching:[{$param}]";
         $i++;
         if ($i % $searches_per_dump == 0) {
             $this->dump_no++;
             $partial_dump = str_replace("mycobank_dump.txt", "partial", $this->dump_file);
             $partial_dump .= "_" . Functions::format_number_with_leading_zeros($this->dump_no, 3) . ".txt";
         }
         /*
         $cont = false;
         // if($i >= 1 && $i < 7000) $cont = true;
         // if($i >= 7000 && $i < 14000) $cont = true;
         // if($i >= 14000 && $i < 21000) $cont = true;
         if(!$cont) continue;
         */
         if (in_array($param, $this->dont_search_more_than_5h)) {
             print "\n [{$param}] must not be searched... \n";
             continue;
         } elseif (in_array($param, $this->dont_search_these_strings_as_well)) {
             print "\n [{$param}] must not be searched... \n";
             continue;
         }
         $no_of_results = 0;
         if ($val = $search_service) {
             $url = $val . '"' . $param . '"';
         } else {
             $url = $this->service_search["startswith"] . '"' . $param . '"';
         }
         echo "\n[{$param}] {$i} of {$total_params} \n";
         if ($contents = Functions::lookup_with_cache($url, $this->download_options)) {
             if ($response = simplexml_load_string($contents)) {
                 if (isset($response->ErrorMessage)) {
                     echo "\n investigate error [{$param}]: " . $response->ErrorMessage . "\n";
                     sleep(120);
                     // 2mins
                     echo "\n access failed [{$param}] ... \n";
                     self::save_to_dump($param, $this->names_with_error_dump_file);
                     continue;
                 }
                 $no_of_results = count($response);
                 if ($no_of_results > 0) {
                     echo " - count: {$no_of_results}";
                     if ($no_of_results >= 500 && $no_of_results < 900) {
                         self::save_to_dump($param . "\t" . $no_of_results, $this->more_than_5h);
                     }
                     if ($no_of_results >= 900) {
                         self::save_to_dump($param . "\t" . $no_of_results, $this->more_than_1k);
                     }
                     $records = array();
                     foreach ($response as $rec) {
                         $hierarchy = "";
                         $source_url = "";
                         $parent = "";
                         if (preg_match("/title\\='(.*?)'/ims", $rec->Classification_, $arr)) {
                             $hierarchy = $arr[1];
                             $parent = self::get_parent_from_hierarchy($hierarchy);
                         }
                         $rec_id = "";
                         if (preg_match("/;Rec\\=(.*?)\\&/ims", $rec->Classification_, $arr)) {
                             $rec_id = $arr[1];
                         }
                         if (preg_match("/href\\='(.*?)'/ims", $rec->Classification_, $arr)) {
                             $source_url = str_ireplace("&amp;", "&", $arr[1]);
                         }
                         $records[] = array("n" => (string) $rec->Name, "cn" => (string) $rec->CurrentName_Pt_, "r" => (string) $rec->Rank_Pt_, "nt" => (string) $rec->NameType_, "ns" => (string) $rec->NameStatus_, "a" => (string) $rec->Authors_, "p" => $parent, "h" => $hierarchy, "s" => $source_url, "t" => (string) $rec->MycoBankNr_, "d" => (string) $rec_id, "y" => (string) $rec->NameYear_, "e3" => (string) $rec->E3787, "e4" => (string) $rec->E4060, "so" => (string) $rec->ObligateSynonyms_Pt_, "sf" => (string) $rec->FacultativeSynonyms_Pt_);
                     }
                     $temp = array();
                     $temp[$param] = $records;
                     self::save_to_dump($temp, $partial_dump);
                 } else {
                     echo "\n no result for: [{$param}]\n";
                     /* decided not to save params with zero records anymore - 14Jul2014
                        // save even with no records, so it won't be searched again...
                        $temp = array();
                        $temp[$param] = array();
                        self::save_to_dump($temp, $partial_dump);
                        */
                 }
             }
         } else {
             echo "\n access failed [{$param}] ... \n";
             self::save_to_dump($param, $this->names_with_error_dump_file);
         }
         self::sleep_now($no_of_results);
     }
 }