private function process_pages($recs)
 {
     foreach ($recs as $rec) {
         // if($rec->title != "42194843") continue; //debug only
         // if($rec->title != "42194845") continue; //debug only
         // if($rec->title != "33870179") continue; //debug only --with copyrightstatus
         // if($rec->title != "13128418") continue; //debug only --with licensor (13128418, 30413122)
         // if($rec->title != "42194845") continue; //debug only --without licensor
         if ($rec->title != "16059324") {
             continue;
         }
         //debug only
         echo "\n" . $rec->title;
         $url = $this->wikipedia_api . "?action=query&titles=" . urlencode($rec->title) . "&format=json&prop=revisions&rvprop=content";
         $json = Functions::lookup_with_cache($url, array('expire_seconds' => true));
         //this expire_seconds should always be true
         $arr = json_decode($json, true);
         foreach (@$arr['query']['pages'] as $page) {
             if ($val = @$page['revisions'][0]['*']) {
                 if ($data = self::parse_wiki_content($val)) {
                     // if(isset($data['Taxa Found in Page (tabular)']['NameConfirmed'])) self::create_archive($data);
                     if (isset($data['Taxa Found in Page']['text'])) {
                         self::create_archive($data);
                     } else {
                         echo "\n[no taxa found for wiki: " . $data['Page Summary']['PageID'] . "]\n";
                     }
                 }
             }
         }
     }
 }
Example #2
0
 public static function get_metadata($url)
 {
     $xml = Functions::lookup_with_cache($url, array('validation_regex' => 'xmlns:'));
     $simple_xml = simplexml_load_string($xml);
     $params = array();
     $dcterms = $simple_xml->children("http://dublincore.org/documents/dcmi-terms/");
     $dwc = $simple_xml->children("http://digir.net/schema/conceptual/darwin/2003/1.0");
     $params['source'] = (string) $dcterms->identifier;
     $data_object = $simple_xml->dataObject;
     $dcterms = $data_object->children("http://dublincore.org/documents/dcmi-terms/");
     $params['citation'] = (string) $dcterms->bibliographicCitation;
     $params['identifier'] = (string) $dcterms->identifier;
     $params['data_type'] = "http://purl.org/dc/dcmitype/Text";
     $params['mime_type'] = "text/html";
     $params['license'] = "not applicable";
     $params['agents'] = array();
     foreach ($data_object->agent as $agent) {
         $agent_name = (string) $agent;
         $attr = $agent->attributes();
         $agent_role = (string) @$attr['role'];
         $params['agents'][] = array($agent_name, $agent_role);
     }
     print_r($xml);
     // print_r($params);
     echo "\n\n\n";
 }
Example #3
0
 private function get_contributors()
 {
     $allowed = array("Robertson Ross", "Robertson &", "Bryant Kevin", "Cox Carol & Bob", "Garin James");
     if ($html = Functions::lookup_with_cache($this->image_list_page, $this->download_options)) {
         if (preg_match("/<ul id=\"scbar\"(.*?)<\\/ul>/ims", $html, $arr)) {
             if (preg_match_all("/<a href=(.*?)<\\/a>/ims", $arr[1], $arr)) {
                 $lines = array();
                 foreach ($arr[1] as $line) {
                     foreach ($allowed as $str) {
                         if (is_numeric(stripos($line, $str))) {
                             $lines[$line] = '';
                             break;
                         }
                     }
                 }
                 $contributors = array();
                 foreach (array_keys($lines) as $line) {
                     if (preg_match("/\"(.*?)\"/ims", $line, $arr)) {
                         $path = $this->domain . trim($arr[1]);
                     }
                     if (preg_match("/<\\/i>(.*?)xxx/ims", $line . "xxx", $arr)) {
                         $contributors[trim($arr[1])] = $path;
                     }
                 }
                 print_r($contributors);
                 return $contributors;
             }
         }
     }
     return false;
 }
 function get_all_taxa()
 {
     $this->uris = self::get_uris();
     // print_r($this->uris); exit;
     // offset = 927 k while caching...
     $limit = 500;
     $offset = 2552000;
     //orig limit=500 offset=0
     while (true) {
         $url = $this->service['specimen'] . "&limit={$limit}&offset={$offset}";
         if ($contents = Functions::lookup_with_cache($url, $this->download_options)) {
             $json = json_decode($contents);
             // print_r($json); exit();
             $returned = count($json->result->records);
             echo "\ncount: [{$returned}]\n";
             // self::process_specimen_records($json);
             // break;
         }
         $offset += $limit;
         if ($returned < $limit) {
             break;
         }
     }
     // exit;
     $this->archive_builder->finalize(TRUE);
 }
Example #5
0
 private function query_kingdom_GGBN_info($kingdom)
 {
     $records = array();
     $rec["source"] = $this->kingdom_service_ggbn . $kingdom;
     $rec["taxon_id"] = $kingdom;
     if ($html = Functions::lookup_with_cache($rec["source"], $this->download_options)) {
         $has_data = false;
         if (preg_match("/<b>(.*?) entries found/ims", $html, $arr) || preg_match("/<b>(.*?) entry found/ims", $html, $arr)) {
             print "\n {$kingdom}: " . $arr[1] . "\n";
             $pages = self::get_number_of_pages($arr[1]);
             print "\n pages to access: [{$pages}]\n";
             for ($i = 1; $i <= $pages; $i++) {
                 echo "\n {$i} of {$pages} ";
                 if ($i > 1) {
                     $rec["source"] = $this->kingdom_service_ggbn . $kingdom . "&page={$i}";
                     $html = Functions::lookup_with_cache($rec["source"], $this->download_options);
                 }
                 if ($temp = self::process_html($html, $rec["source"])) {
                     $records = array_merge($records, $temp);
                 }
             }
         }
     }
     self::create_instances_from_taxon_object($records);
 }
 private function process_html($url, $type)
 {
     if ($html = Functions::lookup_with_cache($url, $this->download_options)) {
         $html = self::clean_html($html);
         $html = str_ireplace('<td align=center>', '<td>', $html);
         return self::parse_page($html, $type);
     }
     return false;
 }
 function convert_to_dwca($params)
 {
     require_library('FlickrAPI');
     $auth_token = NULL;
     // if(FlickrAPI::valid_auth_token(FLICKR_AUTH_TOKEN)) $auth_token = FLICKR_AUTH_TOKEN;
     $page = 1;
     $per_page = 500;
     $url = $this->service['photosets'] . '&photoset_id=' . $params['photoset_id'] . '&user_id=' . $params['flickr_user_id'] . '&per_page=' . $per_page;
     if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) {
         $json = str_replace("\\'", "'", $json);
         $obj = json_decode($json);
         $total_pages = ceil($obj->photoset->total / $per_page);
         echo "\ntotal_pages = {$total_pages}\n";
         for ($i = 1; $i <= $total_pages; $i++) {
             if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) {
                 $json = str_replace("\\'", "'", $json);
                 $obj = json_decode($json);
                 $k = 0;
                 $total_photos = count($obj->photoset->photo);
                 foreach ($obj->photoset->photo as $rec) {
                     $k++;
                     echo "\n{$i} of {$total_pages} - {$k} of {$total_photos}";
                     if (!($sciname = self::get_sciname_from_title($rec->title))) {
                         continue;
                     }
                     // if($sciname == "SONY DSC") //debug
                     // {
                     //     print_r($rec);
                     // }
                     $photo_response = FlickrAPI::photos_get_info($rec->id, $rec->secret, $auth_token, $this->download_options);
                     $photo = @$photo_response->photo;
                     if (!$photo) {
                         continue;
                     }
                     if ($photo->visibility->ispublic != 1) {
                         continue;
                     }
                     if ($photo->usage->candownload != 1) {
                         continue;
                     }
                     if (@(!$GLOBALS["flickr_licenses"][$photo->license])) {
                         continue;
                     }
                     $data_objects = FlickrAPI::get_data_objects($photo, $params['flickr_user_id']);
                     foreach ($data_objects as $do) {
                         self::create_archive($sciname, $do);
                     }
                 }
             }
             $page++;
             // break; //debug
         }
     }
     $this->archive_builder->finalize(TRUE);
 }
Example #8
0
 private function parse_html($url)
 {
     $final = array();
     if ($html = Functions::lookup_with_cache($url, $this->download_options)) {
         $html = str_ireplace("APPENDIX: FIRE REGEIME TABLE", "APPENDIX: FIRE REGIME TABLE", $html);
         $final['source'] = $url;
         $final['life_form'] = self::get_Raunkiaer_life_form($html, $url);
         $final['authorship_citation'] = self::get_authorship_citation($html);
         if (preg_match("/<a name=\"AppendixFireRegimeTable\"(.*?)<a name=\"AppendixB\">/ims", $html, $arr) || preg_match("/<a name='AppendixFireRegimeTable'(.*?)<a name='AppendixB'>/ims", $html, $arr) || preg_match("/<a name='APPENDIX: FIRE REGIME TABLE'(.*?)<a name='REFERENCES'>/ims", $html, $arr) || preg_match("/<a name=\"APPENDIX: FIRE REGIME TABLE\"(.*?)<a name=\"REFERENCES\">/ims", $html, $arr) || preg_match("/<a name=\"APPENDIX: FIRE REGIME TABLE\"(.*?)<a name='REFERENCES'>/ims", $html, $arr) || preg_match("/<a name=\"AppendixFireRegimeTable\"(.*?)<a name='REFERENCES'>/ims", $html, $arr) || preg_match("/<a name='AppendixFireRegimeTable'(.*?)<a name='REFERENCES'>/ims", $html, $arr)) {
             if (preg_match_all("/<tr>(.*?)<\\/tr>/ims", $arr[1], $arr2)) {
                 $TRs = $arr2[1];
                 $i = 0;
                 foreach ($TRs as $tr) {
                     $i++;
                     if ($i == 1) {
                         continue;
                     }
                     //exclude first <tr>
                     if (preg_match_all("/<td(.*?)<\\/td>/ims", $tr, $arr3)) {
                         $temp = $arr3[1];
                         $exclude = array(">Vegetation Community", ">Percent of fires", ">Surface or low", ">Mixed<", "vegetation communities");
                         if (self::needle_occurs_in_this_haystack($temp[0] . "<", $exclude)) {
                             continue;
                         }
                         if (count($temp) == 1) {
                             $index = self::clean_html(strip_tags("<td" . $temp[0]));
                         } else {
                             if (isset($index)) {
                                 if ($to_be_added = self::get_term_to_be_added($temp[0])) {
                                     /* // a good way to catch/debug 
                                        if($to_be_added == "Pacific Northwest")
                                        {
                                            print_r($temp);
                                            echo "\nindex[$index]\n";
                                        }
                                        */
                                     if (isset($final['habitat'][$index])) {
                                         if (!in_array($to_be_added, @$final['habitat'][$index])) {
                                             @($final['habitat'][$index][] = $to_be_added);
                                         }
                                     } else {
                                         @($final['habitat'][$index][] = $to_be_added);
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
             // else echo "\n No <tr>s\n";
         }
         // else echo "\nAPPENDIX: FIRE REGIME TABLE not found\n";
     }
     return $final;
 }
Example #9
0
 function start()
 {
     $topics = array("About the EoE", "Agricultural & Resource Economics", "Biodiversity", "Biology", "Climate Change", "Ecology", "Environmental & Earth Science", "Energy", "Environmental Law & Policy", "Environmental Humanities", "Food", "Forests", "Geography", "Hazards & Disasters", "Health", "Mining & Materials", "People", "Physics & Chemistry", "Pollution", "Society & Environment", "Water", "Weather & Climate", "Wildlife");
     // $topics = array("Biodiversity");
     foreach ($topics as $topic) {
         $this->count = array();
         //it initializes every topic
         if ($OUT = Functions::file_open($this->html_dir . str_replace(" ", "_", $topic) . ".html", "w")) {
         } else {
             exit("\nFile access problem.\n");
         }
         $url = $this->search_url . "&q={$topic}";
         if ($html = Functions::lookup_with_cache($url, $this->download_options)) {
             if (preg_match("/page 1 of (.*?)<\\/title>/ims", $html, $arr)) {
                 $count = $arr[1];
                 for ($i = 1; $i <= $count; $i++) {
                     if ($html = Functions::lookup_with_cache($url . "&page={$i}", $this->download_options)) {
                         if (preg_match_all("/<h1>(.*?)<\\/h1>/ims", $html, $arr)) {
                             print_r($arr[1]);
                             foreach ($arr[1] as $t) {
                                 if (preg_match("/>(.*?)<\\/a>/ims", $t, $arr2)) {
                                     $new_link_text = $arr2[1];
                                     $word_count = str_word_count($new_link_text);
                                     if ($word_count < 3) {
                                         $new_link_text .= " ({$topic})";
                                     }
                                     //--------------
                                     @$this->count[$new_link_text]++;
                                     /* previous 
                                        if($word_count < 3) $c = ($this->count[$new_link_text] > 1 ? $this->count[$new_link_text] : ''); //ternary
                                        else                $c = "";
                                        */
                                     $c = $this->count[$new_link_text] > 1 ? $this->count[$new_link_text] : '';
                                     //ternary
                                     //--------------
                                     $t = str_replace($arr2[1], $new_link_text . " {$c}", $t);
                                 }
                                 fwrite($OUT, $t . "<br>");
                             }
                             // exit;
                         }
                     }
                 }
             }
         }
         fclose($OUT);
     }
 }
 function __construct($type, $params)
 {
     // exit("\n[$value]\n");
     if ($type == 'usercontrib') {
         $namespace['ForReview'] = 5000;
         $namespace['Published'] = 0;
         $url = $params['server'] . "/StudentContributions/api.php?action=query&list=usercontribs&ucuser="******"&uclimit=100&ucdir=older&format=json&ucnamespace=" . $namespace[$params['article_type']] . "&ucshow=top";
         $json = Functions::lookup_with_cache($url, array('expire_seconds' => 0));
         $arr = json_decode($json);
         $titles = array();
         foreach ($arr->query->usercontribs as $item) {
             $titles[] = array('page_title' => $item->title, 'server' => $params['server']);
         }
         $this->body = implode(array_map('api_reader_controller::render_page_row', $titles));
     }
 }
Example #11
0
 function extract_archive_file($dwca_file, $check_file_or_folder_name, $download_options = array('timeout' => 172800, 'expire_seconds' => 0))
 {
     debug("Please wait, downloading resource document...");
     $path_parts = pathinfo($dwca_file);
     $filename = $path_parts['basename'];
     $temp_dir = create_temp_dir() . "/";
     debug($temp_dir);
     if ($file_contents = Functions::lookup_with_cache($dwca_file, $download_options)) {
         $temp_file_path = $temp_dir . "" . $filename;
         debug("temp_dir: {$temp_dir}");
         debug("Extracting... {$temp_file_path}");
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         sleep(5);
         if (preg_match("/^(.*)\\.(tar.gz|tgz)\$/", $dwca_file, $arr)) {
             $cur_dir = getcwd();
             chdir($temp_dir);
             shell_exec("tar -zxvf {$temp_file_path}");
             chdir($cur_dir);
             $archive_path = str_ireplace(".tar.gz", "", $temp_file_path);
         } elseif (preg_match("/^(.*)\\.(zip)\$/", $dwca_file, $arr) || preg_match("/mcz_for_eol(.*?)/ims", $dwca_file, $arr)) {
             shell_exec("unzip -ad {$temp_dir} {$temp_file_path}");
             $archive_path = str_ireplace(".zip", "", $temp_file_path);
         } else {
             debug("-- archive not gzip or zip. [{$dwca_file}]");
             return;
         }
         debug("archive path: [" . $archive_path . "]");
     } else {
         debug("Connector terminated. Remote files are not ready.");
         return;
     }
     if (file_exists($temp_dir . $check_file_or_folder_name)) {
         return array('archive_path' => $temp_dir, 'temp_dir' => $temp_dir);
     } elseif (file_exists($archive_path . "/" . $check_file_or_folder_name)) {
         return array('archive_path' => $archive_path, 'temp_dir' => $temp_dir);
     } else {
         debug("Can't extract archive file. Program will terminate.");
         return;
     }
 }
 function check_if_image_is_broken()
 {
     $options = array('download_wait_time' => 1000000, 'timeout' => 900, 'download_attempts' => 1);
     // 15mins timeout
     $broken = array();
     for ($i = 1; $i <= 58; $i++) {
         $url = "http://eol.org/collections/94950/images?page={$i}&sort_by=3&view_as=3";
         $html = Functions::lookup_with_cache($url, $options);
         echo "\n{$i}. [{$url}]";
         // <a href="/data_objects/26326917"><img alt="84925_88_88" height="68" src="http://media.eol.org/content/2013/09/13/13/84925_88_88.jpg" width="68" /></a>
         if (preg_match_all("/<a href=\"\\/data_objects\\/(.*?)<\\/a>/ims", $html, $arr)) {
             $rows = $arr[1];
             $total_rows = count($rows);
             $k = 0;
             foreach ($rows as $row) {
                 $k++;
                 echo "\n{$i} of 58 - {$k} of {$total_rows}";
                 if (preg_match("/_xxx(.*?)\"/ims", "_xxx" . $row, $arr)) {
                     $id = $arr[1];
                 }
                 if (preg_match("/src=\"(.*?)\"/ims", "_xxx" . $row, $arr)) {
                     $url = $arr[1];
                     $options['cache_path'] = "/Volumes/Eli blue/eol_cache_2/";
                     if ($html = Functions::lookup_with_cache($url, $options)) {
                         echo "\nexists:[{$url}]";
                     } else {
                         echo "\nbroken: [{$url}]";
                         $broken[$id] = $url;
                     }
                     unset($options['cache_path']);
                 }
             }
         }
         // if($i >= 3) break; //debug
     }
     print_r($broken);
 }
Example #13
0
 function get_list_of_user_ids()
 {
     // return array("30860816", "5810611"); // Laura F. [5810611], Eli Agbayani [30860816] , (User: [70505] - Ben Fawkes) has 100+ audio files
     $user_ids = array();
     debug("\n Getting all members... " . $this->EOL_members);
     $offset = 0;
     while (true) {
         if ($json = Functions::lookup_with_cache($this->EOL_members . "&offset={$offset}", $this->download_options)) {
             $offset += 50;
             $users = json_decode($json);
             debug("\n members: " . count($users));
             if (!$users) {
                 break;
             }
             foreach ($users as $user) {
                 $user_ids[(string) $user->id] = 1;
             }
         } else {
             debug("\n Connector terminated. Down: " . $this->EOL_members . "\n");
             return array();
         }
     }
     return array_keys($user_ids);
 }
 private function get_usage_key_again($sciname)
 {
     if ($json = Functions::lookup_with_cache($this->gbif_taxon_info . $sciname . "&verbose=true", $this->download_options)) {
         $usagekeys = array();
         $options = array();
         $json = json_decode($json);
         if (!isset($json->alternatives)) {
             return false;
         }
         foreach ($json->alternatives as $rec) {
             if ($rec->canonicalName == $sciname) {
                 $options[$rec->rank][] = $rec->usageKey;
                 $usagekeys[] = $rec->usageKey;
             }
         }
         if ($options) {
             /* from NCBIGGIqueryAPI.php connector
                if(isset($options["FAMILY"])) return min($options["FAMILY"]);
                else return min($usagekeys);
                */
             return min($usagekeys);
         }
     }
     return false;
 }
 private function get_activities()
 {
     $items = array();
     if ($html = Functions::lookup_with_cache($this->path['activities'], $this->download_options)) {
         //manual adjustment
         $html = self::clean_str(functions::remove_whitespace($html));
         $html = str_ireplace('insect visitors</FONT></P>', 'insect visitors<BR><BR>', $html);
         $html = str_ireplace('<P ALIGN="LEFT"><FONT FACE="Times New Roman">prf', '<BR><BR>prf', $html);
         $html = strip_tags($html, "<BR>");
         if (preg_match_all("/<BR>(.*?)<BR>/ims", $html, $arr)) {
             foreach ($arr[1] as $item) {
                 if (preg_match("/xxx(.*?) =/ims", "xxx" . $item, $arr2)) {
                     $items[trim($arr2[1])] = $item;
                 }
             }
         }
     }
     return $items;
 }
 public function write_alien_taxon($url)
 {
     $taxa_page_html = utf8_encode(Functions::lookup_with_cache($url, array('validation_regex' => '<body')));
     $transformed_html = preg_replace("/<IMG( .*?)\\/?>/ims", "", $taxa_page_html);
     $transformed_html = preg_replace("/<P CLASS=\"style1\">(<B>)?(<I>)?<FONT (SIZE=\"-1\" )?COLOR=\"#(117711|007700|006600)\".*?>/", "<GREEN_COMMENT>", $transformed_html);
     $transformed_html = preg_replace("/<P (ALIGN=\"LEFT\" )?CLASS=\"style1( style1)*\">(<FONT COLOR=\"#000000\">){0,3}(<SPAN CLASS=\"style1\">)? ?<B>/", "<MARK>", $transformed_html);
     $references = $this->get_factsheet_references($transformed_html);
     // Taxon
     if ($url == self::ALIEN_TAXA_PREFIX . "fact/tama1.htm") {
         $taxon_ids = array();
         $taxon_ids[] = $this->write_taxon("tamarix", "Tamarix", "Tamarix", "Tamaricaceae", "", $references);
         $taxon_ids[] = $this->write_taxon("tamarix_aphylla", "Tamarix aphylla", "Tamarix aphylla", "Tamaricaceae", "", $references);
         $taxon_ids[] = $this->write_taxon("tamarix_chinensis", "Tamarix chinensis", "Tamarix chinensis", "Tamaricaceae", "", $references);
         $taxon_ids[] = $this->write_taxon("tamarix_gallica", "Tamarix gallica", "Tamarix gallica", "Tamaricaceae", "", $references);
         $taxon_ids[] = $this->write_taxon("tamarix_parviflora", "Tamarix parviflora", "Tamarix parviflora", "Tamaricaceae", "", $references);
         $taxon_ids[] = $this->write_taxon("tamarix_ramosissima", "Tamarix ramosissima", "Tamarix ramosissima", "Tamaricaceae", "", $references);
         $taxon_id = "tamarix";
     } elseif ($url == self::ALIEN_TAXA_PREFIX . "fact/loni1.htm") {
         $taxon_ids = array();
         $taxon_ids[] = $this->write_taxon("lonicera", "Lonicera", "Lonicera", "Caprifoliaceae", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_fragrantissima", "Lonicera fragrantissima", "Lonicera fragrantissima", "Caprifoliaceae", "fragrant honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_maackii", "Lonicera maackii", "Lonicera maackii", "Caprifoliaceae", "Amur honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_morrowii", "Lonicera morrowii", "Lonicera morrowii", "Caprifoliaceae", "Morrow's honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_standishii", "Lonicera standishii", "Lonicera standishii", "Caprifoliaceae", "Standish's honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_tatarica", "Lonicera tatarica", "Lonicera tatarica", "Caprifoliaceae", "Tartarian honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_xylosteum", "Lonicera xylosteum", "Lonicera xylosteum", "Caprifoliaceae", "European fly honeysuckle", $references);
         $taxon_ids[] = $this->write_taxon("lonicera_x_bella", "Lonicera X bella", "Lonicera X bella", "Caprifoliaceae", "pretty honeysuckle", $references);
         $taxon_id = "lonicera";
     } else {
         $common_name = null;
         $scientific_name = null;
         $canonical_form = null;
         $family = null;
         if (preg_match("/body\" -->.*?<IMG.*? ALT=\"(.*?)\"/ims", $taxa_page_html, $arr)) {
             $common_name = trim($arr[1]);
         } else {
             echo "****COMMON\n";
         }
         if (preg_match("/<FONT.*?SIZE=\"\\+1\">(.*?)<IMG/ims", $taxa_page_html, $arr)) {
             $scientific_name = trim(html_entity_decode($arr[1], ENT_QUOTES, 'UTF-8'));
             $scientific_name = trim(str_replace("\r", " ", $scientific_name));
             $scientific_name = trim(str_replace("\n", " ", $scientific_name));
             if (preg_match("/^(.*)<.*?>[a-z- ]* family *\\((.*?)(\\)|, formerly)/i", $scientific_name, $arr)) {
                 $scientific_name = trim($arr[1]);
                 $family = trim($arr[2]);
             } else {
                 echo "****FAMILY\n";
             }
             $scientific_name = str_replace("</FONT></I>", " ", $scientific_name);
             $scientific_name = str_replace("<I><BR>", "<I>", $scientific_name);
             $scientific_name = preg_replace("/<FONT.*?\\+1\">/ims", "", $scientific_name);
             if (preg_match("/^(.*?)<\\/FONT>/ims", $scientific_name, $arr)) {
                 $scientific_name = trim($arr[1]);
             }
             if (preg_match("/^(.*?)<BR>/ims", $scientific_name, $arr)) {
                 $scientific_name = trim($arr[1]);
             }
             if (preg_match("/^(.*?)\\(previously/ims", $scientific_name, $arr)) {
                 $scientific_name = trim($arr[1]);
             }
             while (preg_match("/  /", $scientific_name)) {
                 $scientific_name = str_replace("  ", " ", $scientific_name);
             }
             $scientific_name = str_replace(" </I>", "</I>", $scientific_name);
             $scientific_name = str_replace("<I> ", "<I>", $scientific_name);
             $scientific_name = preg_replace("/<\\/I>([^ ])/ims", "</I> \\1", $scientific_name);
             $scientific_name = str_replace(".<", ". <", $scientific_name);
             if (preg_match("/<I><EM>/", $scientific_name)) {
                 $scientific_name = str_replace("<EM>", "", $scientific_name);
                 $scientific_name = str_replace("</EM>", "", $scientific_name);
             } else {
                 $scientific_name = str_replace("<EM>", "<I>", $scientific_name);
                 $scientific_name = str_replace("</EM>", "</I>", $scientific_name);
             }
             // too many names
             if (preg_match("/,/", $scientific_name)) {
                 return false;
             }
             list($scientific_name, $canonical_form, $taxon_id) = self::evaluate_scientific_name($scientific_name);
         } else {
             echo "****SCIENTIFIC\n";
         }
         if (!$scientific_name || !$taxon_id) {
             return;
         }
         $this->write_taxon($taxon_id, $scientific_name, $canonical_form, $family, $common_name, $references);
         $taxon_ids = array($taxon_id);
     }
     echo "\n{$url}<br/>\n";
     $authors = $this->get_factsheet_authors($transformed_html);
     $editors = $this->get_factsheet_editors($transformed_html);
     $write_options = array('taxon_ids' => $taxon_ids, 'url' => $url, 'authors' => $authors, 'editors' => $editors);
     if (preg_match("/NATIVE.*?RANGE(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(<\\/FONT>)?(.*?)</ims", $transformed_html, $arr)) {
         $native_range = trim(html_entity_decode($arr[3], ENT_QUOTES, 'UTF-8'));
         $native_range = trim(str_replace("\r", " ", $native_range));
         $native_range = trim(str_replace("\n", " ", $native_range));
         while (preg_match("/  /", $native_range)) {
             $native_range = str_replace("  ", " ", $native_range);
         }
         $this->write_alien_text('Native Range', $native_range, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution', $taxon_id . "/alien_range", $write_options);
     } else {
         echo "****NATIVE\n";
     }
     if (preg_match("/> *DESCRIPTION(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $description = self::cleanse_alien_description($arr[2]);
         $this->write_alien_text('Description', $description, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#Morphology', $taxon_id . "/alien_description", $write_options);
     } else {
         echo "****DESCRIPTION\n";
     }
     if (preg_match("/> *ECOLOGICAL.*?THREAT(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $threat = self::cleanse_alien_description($arr[2]);
         $this->write_alien_text('Ecological Threat in the United States', $threat, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement', $taxon_id . "/alien_threat", $write_options);
     } else {
         echo "****ECOLOGICAL\n";
     }
     if (preg_match("/> *DISTRIBUTION.*?IN.*?THE.*?UNITED.*?STATES(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $distribution = self::cleanse_alien_description($arr[2]);
         $this->write_alien_text('Distribution in the United States', $distribution, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution', $taxon_id . "/alien_distribution", $write_options);
     } else {
         echo "****DISTRIBUTION\n";
     }
     if (preg_match("/> *HABITAT.*?IN.*?THE.*?UNITED.*?STATES(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $habitat = self::cleanse_alien_description($arr[2]);
         $this->write_alien_text('Habitat in the United States', $habitat, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#Habitat', $taxon_id . "/alien_habitat", $write_options);
     } else {
         echo "****HABITAT\n";
     }
     if (preg_match("/> *BACKGROUND(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $background = self::cleanse_alien_description($arr[2]);
         $this->write_alien_text('History in the United States', $background, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#TaxonBiology', $taxon_id . "/alien_background", $write_options);
     } else {
         echo "****BACKGROUND\n";
     }
     if (preg_match("/> *BIOLOGY.*?(&amp;|and).*?SPREAD(<BR>.*?<\\/B>|<\\/B>.*?<BR>)(.*?)<MARK>/ims", $transformed_html, $arr)) {
         $biology = self::cleanse_alien_description($arr[3]);
         $this->write_alien_text('Biology and Spread', $biology, 'http://rs.tdwg.org/ontology/voc/SPMInfoItems#Reproduction', $taxon_id . "/alien_biology", $write_options);
     } else {
         echo "****BIOLOGY\n";
     }
 }
Example #17
0
     $taxa[] = new \SchemaTaxon($taxonParameters);
     //if($i >= 5) break; //debug
 }
 $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa);
 $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
 if (!($OUT = Functions::file_open($old_resource_path, "w+"))) {
     return;
 }
 fwrite($OUT, $new_resource_xml);
 fclose($OUT);
 Functions::set_resource_status_to_force_harvest($resource_id);
 shell_exec("rm " . $new_resource_path);
 //--------
 // 0x73 0x20 0x68 0x61
 $xml_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
 if ($xml = Functions::lookup_with_cache($xml_path, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => true))) {
     // $xml = str_replace(chr(0x73)." ".chr(0x20)." ".chr(0x73)." ".chr(0x6B), " ", $xml);
     // $xml = str_replace(array(chr(0x73), chr(0x20), chr(0x68), chr(0x61)), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x73, 0x6b), " ", $xml);
     $xml = str_replace(array(0x32, 0x35, 0x2e, 0x35), " ", $xml);
     $xml = str_replace(array(0x32, 0x33, 0x20, 0x6d), " ", $xml);
     $xml = str_replace(chr(0x32) . " " . chr(0x33) . " " . chr(0x20) . " " . chr(0x6d), " ", $xml);
     $xml = str_replace(array(0x20, 0x4e, 0x61, 0x74), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x68, 0x6f), " ", $xml);
     $xml = str_replace(chr(0x73) . " " . chr(0x20) . " " . chr(0x68) . " " . chr(0x6f), " ", $xml);
     $xml = str_replace(array(0x77, 0x65, 0x72, 0x65), " ", $xml);
     $xml = str_replace(array(0xe2, 0x80, 0xc2, 0xa6), " ", $xml);
     $xml = str_replace(array(0x6e, 0x20, 0x32, 0x30), " ", $xml);
     $xml = str_replace(array(0x67, 0x75, 0x65, 0x7a), " ", $xml);
     $xml = str_replace(array(0x73, 0x20, 0x61, 0x6e), " ", $xml);
     $xml = str_replace(array(0x74, 0x7a, 0x3c, 0x2f), " ", $xml);
Example #18
0
 private function query_family_NCBI_info($family, $is_subfamily, $database)
 {
     $rec["family"] = $family;
     $rec["source"] = $this->family_service_ncbi . $family;
     $rec["taxon_id"] = $family;
     $contents = Functions::lookup_with_cache($rec["source"], $this->download_options);
     if ($xml = simplexml_load_string($contents)) {
         if ($xml->Count > 0) {
             $rec["object_id"] = "_no_of_seq_in_genbank";
             $rec["count"] = $xml->Count;
             $rec["label"] = "Number Of Sequences In GenBank";
             $rec["measurement"] = "http://eol.org/schema/terms/NumberOfSequencesInGenBank";
             self::save_to_dump($rec, $this->ggi_text_file[$database]["current"]);
             $rec["object_id"] = "SequenceInGenBank";
             $rec["count"] = "http://eol.org/schema/terms/yes";
             $rec["label"] = "SequenceInGenBank";
             $rec["measurement"] = "http://eol.org/schema/terms/SequenceInGenBank";
             self::save_to_dump($rec, $this->ggi_text_file[$database]["current"]);
             return true;
         }
     }
     if (!$is_subfamily) {
         $rec["object_id"] = "_no_of_seq_in_genbank";
         self::add_string_types($rec, "Number Of Sequences In GenBank", 0, "http://eol.org/schema/terms/NumberOfSequencesInGenBank", $family);
         $rec["object_id"] = "SequenceInGenBank";
         self::add_string_types($rec, "SequenceInGenBank", "http://eol.org/schema/terms/no", "http://eol.org/schema/terms/SequenceInGenBank", $family);
         self::has_diff_family_name_in_eol_api($family, $database);
     }
     self::check_for_sub_family($family);
     return false;
 }
 private function create_cache($type, $id)
 {
     if ($type == "id_list") {
         $pagesize = 1000;
         // debug orig value max size is 1000; pagesize is the no. of records returned from Tropicos master list service
         $url = TROPICOS_API_SERVICE . "List?startid={$id}&PageSize={$pagesize}&apikey=" . TROPICOS_API_KEY . "&format=json";
     } elseif ($type == "taxon_name") {
         $url = TROPICOS_API_SERVICE . $id . "?format=json&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "taxonomy") {
         $url = TROPICOS_API_SERVICE . $id . "/HigherTaxa?format=xml&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "synonyms") {
         $url = TROPICOS_API_SERVICE . $id . "/Synonyms?format=xml&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "taxon_ref") {
         $url = TROPICOS_API_SERVICE . $id . "/References?format=xml&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "distribution") {
         $url = TROPICOS_API_SERVICE . $id . "/Distributions?format=xml&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "images") {
         $url = TROPICOS_API_SERVICE . $id . "/Images?format=xml&apikey=" . TROPICOS_API_KEY;
     } elseif ($type == "chromosome") {
         $url = TROPICOS_API_SERVICE . $id . "/ChromosomeCounts?format=xml&apikey=" . TROPICOS_API_KEY;
     }
     if ($contents = Functions::lookup_with_cache($url, $this->download_options)) {
         return $contents;
     } else {
         return false;
     }
 }
Example #20
0
 private function prepare_contributor_galleries()
 {
     // get urls for each contributor
     $urls = array();
     if ($html = Functions::lookup_with_cache($this->adw_page["contributor_galleries"], $this->download_options)) {
         if (preg_match("/<ol class=\"unstyled\">(.*?)<\\/ol>/ims", $html, $arr)) {
             if (preg_match_all("/<li>(.*?)<\\/li>/ims", $arr[1], $arr)) {
                 foreach ($arr[1] as $block) {
                     if (preg_match("/<a href=\"(.*?)\"/ims", $block, $temp)) {
                         $urls[$temp[1]] = '';
                     }
                 }
             }
         }
     }
     // loop to each contributor and get all media
     $i = 0;
     foreach (array_keys($urls) as $url) {
         $i++;
         /* breakdown when caching
            $m = 100;
            $cont = false;
            // if($i >=  1    && $i < $m)    $cont = true;
            // if($i >=  $m   && $i < $m*2)  $cont = true;
            // if($i >=  $m*2 && $i < $m*3)  $cont = true; Done
            if(!$cont) continue;
            */
         $type = "pictures";
         if ($url == "/collections/contributors/naturesongs/") {
             $type = "sounds";
         }
         //manual adjustment, not images of taxa but of habitats
         if (in_array($url, array("/collections/contributors/habitat_images/"))) {
             continue;
         }
         echo "\ncontributor: [{$url}]\n";
         $taxa_with_media = self::get_taxa_with_media($this->domain . $url);
         self::get_media_data($taxa_with_media, $type);
         // if($i > 5) break; //debug
     }
 }
Example #21
0
 private function get_links($url)
 {
     if (!($url = trim($url))) {
         return array();
     }
     // for blank urls
     $links = array();
     if ($html = Functions::lookup_with_cache($url, $this->download_options)) {
         if (preg_match_all("/<div class=\"ngg-albumtitle\">(.*?)<\\/div>/ims", $html, $arr)) {
             foreach ($arr[1] as $line) {
                 $this->site_links_info[$url][] = $line;
                 if (preg_match("/\"(.*?)\"/ims", $line, $arr2)) {
                     $links[] = $arr2[1];
                 }
             }
         }
         if (preg_match_all("/<div class=\"ngg-thumbnail\">(.*?)<p><\\/p>/ims", $html, $arr)) {
             $this->site_thumbnails[$url] = $arr[1];
         }
         if (preg_match_all("/<div class=\"ngg-gallery-thumbnail\" >(.*?)<\\/div>/ims", $html, $arr)) {
             // <h1 class="category-title">Blue Sponge; Haliclona species.</h1>
             if (preg_match("/<div class='ngg-navigation'>(.*?)<\\/div>/ims", $html, $arr3)) {
                 $urls = self::get_pagination_links($arr3[1]);
                 $this->pagination_links = array_merge($this->pagination_links, $urls);
             }
             if (preg_match("/<h1 class=\"category-title\">(.*?)<\\/h1>/ims", $html, $arr2)) {
                 $this->site_thumbnails_gallery[$url]["title"] = $arr2[1];
                 $this->site_thumbnails_gallery[$url]["rekords"] = $arr[1];
             } else {
                 echo "\n investigate [{$url}] no title \n";
             }
         }
     }
     return $links;
 }
 private function get_taxa_list()
 {
     $taxa = array();
     if ($html = Functions::lookup_with_cache($this->species_list, $this->download_options)) {
         if (preg_match_all("/<a class=\"fullname\"(.*?)<\\/a>/ims", $html, $arr)) {
             $rows = array_map('trim', $arr[1]);
             foreach ($rows as $row) {
                 if (preg_match("/speciesCode=(.*?)\"/ims", $row, $arr)) {
                     $id = $arr[1];
                 }
                 if (preg_match("/\">(.*?)xxx/ims", $row . "xxx", $arr)) {
                     $name = trim($arr[1]);
                 }
                 if (preg_match("/href=\"(.*?)\"/ims", $row, $arr)) {
                     $source = $arr[1];
                 }
                 $taxa[] = array("taxon_id" => $id, "sciname" => $name, "source" => $source);
             }
         }
     }
     return $taxa;
 }
Example #23
0
 private function parse_taxon_page($url)
 {
     $final = array();
     $options = $this->download_options;
     if ($html = Functions::lookup_with_cache($url, $options)) {
         //get comnames
         if (preg_match("/<b>Other synonyms<\\/b>(.*?)<\\/font>/ims", $html, $arr)) {
             $temp = explode("<br>", $arr[1]);
             foreach ($temp as $t) {
                 $rec = array();
                 if (preg_match("/<b>(.*?)<\\/b>/ims", $t, $arr)) {
                     $rec['lang'] = trim(str_ireplace(":", "", $arr[1]));
                 }
                 $temp = explode("</b>", $t);
                 // get string right side of '</b>'
                 if ($val = @$temp[1]) {
                     $comnames = explode(",", $val);
                     $rec['comnames'] = array_map('trim', $comnames);
                 }
                 if ($rec) {
                     $final[] = $rec;
                 }
             }
         }
         //get authorship
         if (preg_match("/Citation:(.*?)<\\/p>/ims", $html, $arr)) {
             $authorship = Functions::remove_whitespace(strip_tags($arr[1]));
             $authorship = str_ireplace('&nbsp;', '', $authorship);
         } else {
             // no author! this assumes that a wrong file is cached; this merits a 2nd run of the connector
             $options['expire_seconds'] = 0;
             $html = Functions::lookup_with_cache($url, $options);
             echo "\nconnector has to run again\n";
         }
     }
     return array('comnames' => $final, 'authorship' => $authorship);
 }
Example #24
0
 public static function get_eol_defined_uris($download_options = false)
 {
     if (!$download_options) {
         $download_options = array('resource_id' => 'URIs', 'download_wait_time' => 1000000, 'timeout' => 900, 'expire_seconds' => 86400, 'download_attempts' => 1);
     }
     //expires in 24 hours
     for ($i = 1; $i <= 15; $i++) {
         $urls = array();
         // $urls[] = "http://localhost/cp/TraitRequest/measurements/URIs for Data on EOL - Encyclopedia of Life" . $i . ".html";
         $urls[] = "https://dl.dropboxusercontent.com/u/7597512/TraitRequest/measurements/URIs for Data on EOL - Encyclopedia of Life" . $i . ".html";
         foreach ($urls as $url) {
             if ($html = Functions::lookup_with_cache($url, $download_options)) {
                 $html = str_ireplace("<wbr/>", "", $html);
                 $params = array("<tr class='hidden' id='known_uri", "<tr id='known_uri");
                 foreach ($params as $param) {
                     if (preg_match_all("/" . $param . "(.*?)<\\/tr>/ims", $html, $arr)) {
                         foreach ($arr[1] as $t) {
                             if (preg_match("/<td class='uri'>(.*?)<\\/td>/ims", $t, $arr2) || preg_match("/<td class='excluded uri'>(.*?)<\\/td>/ims", $t, $arr2)) {
                                 $val = '';
                                 if (preg_match("/<td>(.*?)<\\/td>/ims", $t, $arr3)) {
                                     $val = $arr3[1];
                                 }
                                 $rec[$arr2[1]] = $val;
                             }
                         }
                     }
                 }
             } else {
                 return $rec;
             }
         }
     }
     return $rec;
 }
Example #25
0
 private function barcode_image_available($src)
 {
     $str = Functions::lookup_with_cache($src, $this->download_options);
     /*
     ERROR: Only species level taxids are accepted
     ERROR: Unable to retrieve sequence
     */
     if (is_numeric(stripos($str, "ERROR:"))) {
         return false;
     } else {
         return true;
     }
 }
Example #26
0
 private function get_image_urls($rec)
 {
     $mediaURLs = array();
     $url = $this->images_path . "&species=" . $rec["Species"] . "&navi=";
     if ($html = Functions::lookup_with_cache($url . "1", $this->download_options)) {
         $navi = 1;
         if (preg_match("/>1 of (.*?)<\\/font/ims", $html, $arr)) {
             $navi = trim($arr[1]);
         }
         for ($i = 1; $i <= $navi; $i++) {
             if ($i == 1) {
                 if (preg_match_all("/src=\"(.*?)\"/ims", $html, $arr)) {
                     $mediaURLs = array_merge($mediaURLs, $arr[1]);
                 }
             } else {
                 if ($html = Functions::lookup_with_cache($url . $i, $this->download_options)) {
                     if (preg_match_all("/src=\"(.*?)\"/ims", $html, $arr)) {
                         $mediaURLs = array_merge($mediaURLs, $arr[1]);
                     }
                 }
             }
         }
     }
     return $mediaURLs;
 }
/* 
    Expects: $params
    Array
    (
        [search_type] => gen_archive_all
        [archive_id] => BHL_lit_2016_07_21_01_35_41
    )
*/
// echo "<pre>"; print_r($params); echo "</pre>";
$url = EOL_PHP_CODE . "update_resources/connectors/mediawiki.php?archive_id=" . urldecode($params['archive_id']);
?>
<div id="accordion_open2">
    <h3>Generate EOL DWC-A</h3>
    <div>
    <?php 
if ($val = Functions::lookup_with_cache($url, array('expire_seconds' => true))) {
    if (strpos($val, "[SUCCESS]") !== false) {
        self::display_message(array('type' => "highlight", 'msg' => "EOL DWC-A successfully generated."));
        $url = EOL_PHP_CODE . "applications/content_server/resources/" . str_replace(array(":", " "), "_", $params['archive_id']) . ".tar.gz";
        echo "<br>You can copy the URL below and use it as a resource in an EOL Content Partner resource account (<a target='eol' href='http://eol.org'>eol.org</a>).";
        echo "<br><br><a href='" . $url . "'>{$url}</a>";
        $url = EOL_PHP_CODE . "applications/dwc_validator/index.php?file_url=" . $url;
        echo "<br><br>You can also try to validate the archive file <a target='_blank' href='" . $url . "'>here</a>.";
    } else {
        self::display_message(array('type' => "error", 'msg' => "Process un-successful."));
        // echo "<br>[$val]<br>"; //debug
    }
} else {
    self::display_message(array('type' => "error", 'msg' => "Process un-successful."));
    // echo "<br>[$val]<br>"; //debug
}
Example #28
0
 function generate_licensor_title_list()
 {
     $recs = array();
     $url = "https://docs.google.com/spreadsheets/u/1/d/1ExBu0Q9yLXsYVNzXdIrDYt2Go6blwftAEEb5kJk-dfk/pub?output=html";
     $html = Functions::lookup_with_cache($url, array('expire_seconds' => 86400, 'download_wait_time' => 1000000));
     //expires every 24 hours
     if (preg_match_all("/<tr style\\=\\'height\\:1px\\;\\'>(.*?)<\\/tr>/ims", $html, $arr)) {
         foreach ($arr[1] as $t) {
             if (preg_match_all("/<td (.*?)<\\/td>/ims", $t, $arr2)) {
                 $a = $arr2[1];
                 $temp1 = explode(">", $a[1]);
                 $temp2 = explode(">", $a[2]);
                 $recs[$temp2[1]] = $temp1[1];
             }
         }
     }
     return $recs;
 }
Example #29
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::lookup_with_cache($this->zip_path, array('timeout' => 172800, 'download_attempts' => 5))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt")) {
                 return false;
             }
         }
         $this->text_path[1] = $this->TEMP_FILE_PATH . "/Hds1-Hymenoptera-Final.txt";
         $this->text_path[2] = $this->TEMP_FILE_PATH . "/Hds2-Hymenoptera-Final.txt";
         $this->text_path[3] = $this->TEMP_FILE_PATH . "/Hds3-Hymenoptera-Final.txt";
         $this->text_path[4] = $this->TEMP_FILE_PATH . "/Hds4-Hymenoptera-Final.txt";
         $this->text_path[5] = $this->TEMP_FILE_PATH . "/Hds5-Hymenoptera-Final.txt";
         $this->text_path[6] = $this->TEMP_FILE_PATH . "/Hds6-Hymenoptera-Final.txt";
         $this->text_path[7] = $this->TEMP_FILE_PATH . "/Hds7-Hymenoptera-Final.txt";
         $this->text_path[8] = $this->TEMP_FILE_PATH . "/Hds8-Hymenoptera-Final.txt";
         $this->text_path[9] = $this->TEMP_FILE_PATH . "/Hds9-Hymenoptera-Final.txt";
         $this->text_path[10] = $this->TEMP_FILE_PATH . "/Hds10-Hymenoptera-Final.txt";
         $this->text_path[11] = $this->TEMP_FILE_PATH . "/HymEcoParDone.txt";
         return true;
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return false;
     }
 }
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     $download_options = $this->download_options;
     $download_options['expire_seconds'] = 1728000;
     // expire_seconds = 20 days in normal operation 1728000
     if ($file_contents = Functions::lookup_with_cache($this->fishbase_data, $download_options)) {
         $temp_file_path = $this->TEMP_FILE_PATH . "/fishbase.zip";
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
                 return;
             }
         }
         $this->text_path['TAXON_PATH'] = $this->TEMP_FILE_PATH . "/taxon.txt";
         $this->text_path['TAXON_COMNAMES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_comnames.txt";
         $this->text_path['TAXON_DATAOBJECT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject.txt";
         $this->text_path['TAXON_DATAOBJECT_AGENT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_agent.txt";
         $this->text_path['TAXON_DATAOBJECT_REFERENCE_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_reference.txt";
         $this->text_path['TAXON_REFERENCES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_references.txt";
         $this->text_path['TAXON_SYNONYMS_PATH'] = $this->TEMP_FILE_PATH . "/taxon_synonyms.txt";
     } else {
         echo "\n\n Connector terminated. Remote files are not ready.\n\n";
         return;
     }
 }