function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->fishbase_data, array('timeout' => 172800))) {
         $temp_file_path = $this->TEMP_FILE_PATH . "/fishbase.zip";
         $TMP = fopen($temp_file_path, "w");
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("tar -xzf {$temp_file_path} -C {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/taxon.txt")) {
                 return;
             }
         }
         $this->text_path['TAXON_PATH'] = $this->TEMP_FILE_PATH . "/taxon.txt";
         $this->text_path['TAXON_COMNAMES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_comnames.txt";
         $this->text_path['TAXON_DATAOBJECT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject.txt";
         $this->text_path['TAXON_DATAOBJECT_AGENT_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_agent.txt";
         $this->text_path['TAXON_DATAOBJECT_REFERENCE_PATH'] = $this->TEMP_FILE_PATH . "/taxon_dataobject_reference.txt";
         $this->text_path['TAXON_REFERENCES_PATH'] = $this->TEMP_FILE_PATH . "/taxon_references.txt";
         $this->text_path['TAXON_SYNONYMS_PATH'] = $this->TEMP_FILE_PATH . "/taxon_synonyms.txt";
     } else {
         echo "\n\n Connector terminated. Remote files are not ready.\n\n";
         return;
     }
 }
 function load_xml_string()
 {
     $file_contents = "";
     debug("Please wait, downloading resource document...");
     if (preg_match("/^(.*)\\.(gz|gzip)\$/", $this->xml_path, $arr)) {
         $path_parts = pathinfo($this->xml_path);
         $filename = $path_parts['basename'];
         $temp_dir = create_temp_dir() . "/";
         debug("temp file path: " . $temp_dir);
         if ($file_contents = Functions::get_remote_file($this->xml_path, array('timeout' => 172800))) {
             $temp_file_path = $temp_dir . "/" . $filename;
             $TMP = fopen($temp_file_path, "w");
             fwrite($TMP, $file_contents);
             fclose($TMP);
             shell_exec("gunzip -f {$temp_file_path}");
             $this->xml_path = $temp_dir . str_ireplace(".gz", "", $filename);
             debug("xml path: " . $this->xml_path);
         } else {
             debug("Connector terminated. Remote files are not ready.");
             return false;
         }
         echo "\n {$temp_dir} \n";
         $file_contents = Functions::get_remote_file($this->xml_path, array('timeout' => 172800));
         recursive_rmdir($temp_dir);
         // remove temp dir
         echo "\n temporary directory removed: [{$temp_dir}]\n";
     }
     return $file_contents;
 }
예제 #3
0
function remove_bhl_images_already_existing_in_eol_group($resource_id)
{
    $file = "http://dl.dropbox.com/u/7597512/BHL_images/BHL_images_in_EOLGroup.txt";
    // $file = "http://localhost/cp/BHL/BHL_images/BHL_images_in_EOLGroup.txt";
    $contents = Functions::get_remote_file($file, array('timeout' => 600, 'download_attempts' => 5));
    $do_ids = json_decode($contents, true);
    print "\n\n from text file: " . count($do_ids);
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    $xml_string = Functions::get_remote_file($resource_path, array('timeout' => 240, 'download_attempts' => 5));
    $xml = simplexml_load_string($xml_string);
    $i = 0;
    $deleted_ids = array();
    $deleted = 0;
    foreach ($xml->taxon as $taxon) {
        $i++;
        $dwc = $taxon->children("http://rs.tdwg.org/dwc/dwcore/");
        echo "\n[" . $dwc->ScientificName . "]";
        $j = 0;
        $deleted_do_keys = array();
        foreach ($taxon->dataObject as $do) {
            $j++;
            $dc2 = $do->children("http://purl.org/dc/elements/1.1/");
            $do_id = trim($dc2->identifier);
            if (in_array($do_id, $do_ids)) {
                $deleted++;
                $deleted_ids[$do_id] = 1;
                print "\n --- deleting {$do_id}";
                $deleted_do_keys[] = $j - 1;
            }
        }
        foreach ($deleted_do_keys as $key) {
            unset($xml->taxon[$i - 1]->dataObject[$key]);
        }
    }
    print "\n\n occurrence do_ids: {$i}";
    print "\n\n deleted <dataObject>s: {$deleted}";
    print "\n\n deleted unique do_ids: " . count($deleted_ids);
    $xml_string = $xml->asXML();
    require_library('ResourceDataObjectElementsSetting');
    $xml_string = ResourceDataObjectElementsSetting::delete_taxon_if_no_dataObject($xml_string);
    if (!($WRITE = Functions::file_open($resource_path, "w"))) {
        return;
    }
    fwrite($WRITE, $xml_string);
    fclose($WRITE);
}
 function combine_all_xmls($resource_id)
 {
     if (!($species_urls = self::get_species_urls())) {
         return;
     }
     debug("\n\n Start compiling all XML...");
     $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
     if (!($OUT = fopen($old_resource_path, "w+"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path);
         return;
     }
     $str = "<?xml version='1.0' encoding='utf-8' ?>\n";
     $str .= "<response\n";
     $str .= "  xmlns='http://www.eol.org/transfer/content/0.3'\n";
     $str .= "  xmlns:xsd='http://www.w3.org/2001/XMLSchema'\n";
     $str .= "  xmlns:dc='http://purl.org/dc/elements/1.1/'\n";
     $str .= "  xmlns:dcterms='http://purl.org/dc/terms/'\n";
     $str .= "  xmlns:geo='http://www.w3.org/2003/01/geo/wgs84_pos#'\n";
     $str .= "  xmlns:dwc='http://rs.tdwg.org/dwc/dwcore/'\n";
     $str .= "  xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n";
     $str .= "  xsi:schemaLocation='http://www.eol.org/transfer/content/0.3 http://services.eol.org/schema/content_0_3.xsd'>\n";
     fwrite($OUT, $str);
     $i = 0;
     $total = sizeof($species_urls);
     foreach ($species_urls as $filename) {
         $i++;
         print "\n {$i} of {$total}";
         sleep(2);
         $contents = Functions::get_remote_file($filename);
         if ($xml = simplexml_load_string($contents)) {
             $contents = str_ireplace("http://creativecommons.org/licenses/by-nc-sa/2.5/mx/", "http://creativecommons.org/licenses/by-nc-sa/2.5/", $contents);
             if ($contents) {
                 $pos1 = stripos($contents, "<taxon>");
                 $pos2 = stripos($contents, "</response>");
                 $str = substr($contents, $pos1, $pos2 - $pos1);
                 fwrite($OUT, $str);
             }
         } else {
             print "\n {$filename} - invalid XML";
             continue;
         }
     }
     fwrite($OUT, "</response>");
     fclose($OUT);
     print "\n All XML compiled\n -end-of-process- \n";
 }
예제 #5
0
 function clean_media_extension($resource_id, $dwca_file)
 {
     require_library('connectors/INBioAPI');
     $func = new INBioAPI();
     if ($paths = $func->extract_archive_file($dwca_file, "meta.xml")) {
         print_r($paths);
         if ($contents = Functions::get_remote_file($paths['archive_path'] . "media.txt", array('timeout' => 172800))) {
             $contents = str_ireplace('<a title=""', '<a title="', $contents);
             $contents = str_ireplace('"" href=""', '" href="', $contents);
             $contents = str_ireplace('"">', '">', $contents);
             //saving new media.txt
             if (!($WRITE = fopen($paths['archive_path'] . "media.txt", "w"))) {
                 debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $paths['archive_path'] . "media.txt");
                 return;
             }
             fwrite($WRITE, $contents);
             fclose($WRITE);
             // remove the archive file e.g. plazi.zip
             $info = pathinfo($dwca_file);
             unlink($paths['archive_path'] . $info["basename"]);
             // creating the archive file
             $command_line = "tar -czf " . CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".tar.gz --directory=" . $paths['archive_path'] . " .";
             $output = shell_exec($command_line);
             // moving files to /resources/
             recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             if (!file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) {
                 mkdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id);
             }
             $src = $paths['archive_path'];
             $dst = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "/";
             $files = glob($paths['archive_path'] . "*.*");
             foreach ($files as $file) {
                 $file_to_go = str_replace($src, $dst, $file);
                 copy($file, $file_to_go);
             }
         }
         // remove temp dir
         recursive_rmdir($paths['archive_path']);
         echo "\n temporary directory removed: " . $paths['archive_path'];
     }
 }
예제 #6
0
$url_list_of_group_ids = "http://www.morphbank.net/eolids.xml";
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
$timestart = time_elapsed();
$mysqli =& $GLOBALS['mysqli_connection'];
$resource_id = 83;
$details_method_prefix = "http://services.morphbank.net/mb3/request?method=id&format=svc&limit=2&id=";
$image_ids = array();
$schema_taxa = array();
$used_taxa = array();
$url_list_of_image_ids = "http://services.morphbank.net/mb3/request?method=eol&format=id&limit=-1";
/* Excludes MorphBank IDs as suggested by BioImages Vanderbuilt */
$excluded_MorphBank_IDs = prepare_excluded_ids();
if ($url_list_of_image_ids) {
    print "\n [url_list_of_image_ids: {$url_list_of_image_ids}] \n";
    $response = Functions::get_remote_file($url_list_of_image_ids, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5));
    $image_id_xml = simplexml_load_string($response);
    if ($image_id_xml) {
        foreach ($image_id_xml->id as $id) {
            $image_ids[] = $id;
        }
    }
}
$total_image_ids = count($image_ids);
print "\n count of image ID's = {$total_image_ids}";
if ($total_image_ids == 0) {
    exit("\n Program will terminate. MorphBank service not ready.");
}
/* loop through image ids */
$k = 0;
foreach ($image_ids as $image_id) {
예제 #7
0
 public static function lookup_with_cache($url, $options = array())
 {
     // default expire time is 30 days
     if (!isset($options['expire_seconds'])) {
         $options['expire_seconds'] = 2592000;
     }
     if (!isset($options['timeout'])) {
         $options['timeout'] = 120;
     }
     // if(!isset($options['cache_path'])) $options['cache_path'] = DOC_ROOT . "tmp/cache/";
     if (!isset($options['cache_path'])) {
         $options['cache_path'] = "/Volumes/Eli black/eol_cache/";
     }
     $md5 = md5($url);
     $cache1 = substr($md5, 0, 2);
     $cache2 = substr($md5, 2, 2);
     if ($resource_id = @$options['resource_id']) {
         $options['cache_path'] .= "{$resource_id}/";
         if (!file_exists($options['cache_path'])) {
             mkdir($options['cache_path']);
         }
     }
     if (!file_exists($options['cache_path'] . $cache1)) {
         mkdir($options['cache_path'] . $cache1);
     }
     if (!file_exists($options['cache_path'] . "{$cache1}/{$cache2}")) {
         mkdir($options['cache_path'] . "{$cache1}/{$cache2}");
     }
     $cache_path = $options['cache_path'] . "{$cache1}/{$cache2}/{$md5}.cache";
     if (file_exists($cache_path)) {
         $file_contents = file_get_contents($cache_path);
         $cache_is_valid = true;
         if (@$options['validation_regex'] && !preg_match("/" . $options['validation_regex'] . "/ims", $file_contents)) {
             $cache_is_valid = false;
         }
         if ($file_contents && $cache_is_valid || strval($file_contents) == "0" && $cache_is_valid) {
             $file_age_in_seconds = time() - filemtime($cache_path);
             if ($file_age_in_seconds < $options['expire_seconds']) {
                 return $file_contents;
             }
             if ($options['expire_seconds'] === false) {
                 return $file_contents;
             }
         }
         @unlink($cache_path);
     }
     $file_contents = Functions::get_remote_file($url, $options);
     if ($FILE = Functions::file_open($cache_path, 'w+')) {
         fwrite($FILE, $file_contents);
         fclose($FILE);
     } else {
         if (!($h = Functions::file_open(DOC_ROOT . "/public/tmp/cant_delete.txt", 'a'))) {
             return;
         }
         fwrite($h, $cache_path . "\n");
         fclose($h);
     }
     return $file_contents;
 }
예제 #8
0
 public static function pools_get_photos($group_id, $machine_tag, $per_page, $page, $auth_token = "", $user_id = NULL, $start_date = NULL, $end_date = NULL)
 {
     $extras = "last_update,media,url_o";
     $url = self::generate_rest_url("flickr.groups.pools.getPhotos", array("group_id" => $group_id, "machine_tags" => $machine_tag, "extras" => $extras, "per_page" => $per_page, "page" => $page, "auth_token" => $auth_token, "user_id" => $user_id, "format" => "json", "nojsoncallback" => 1), 1);
     if (in_array($user_id, array(FLICKR_BHL_ID, FLICKR_SMITHSONIAN_ID))) {
         /* remove group_id param to get images from photostream, and not only those in the EOL Flickr group */
         $url = self::generate_rest_url("flickr.photos.search", array("machine_tags" => $machine_tag, "extras" => $extras, "per_page" => $per_page, "page" => $page, "auth_token" => $auth_token, "user_id" => $user_id, "license" => "1,2,4,5,7", "privacy_filter" => "1", "sort" => "date-taken-asc", "min_taken_date" => $start_date, "max_taken_date" => $end_date, "format" => "json", "nojsoncallback" => 1), 1);
     }
     return json_decode(Functions::get_remote_file($url, array('timeout' => 30)));
 }
예제 #9
0
 function assemble_xml_files()
 {
     $arr_taxa = array();
     $arr_predator = array();
     $arr_prey = array();
     $arr_ref = array();
     for ($i = 1; $i <= 259; $i++) {
         print "\n {$i} ---" . SPIRE_SERVICE . $i;
         if (!($str = Functions::get_remote_file(SPIRE_SERVICE . $i))) {
             echo "\n\nSPIRE service not available at the moment.\n\n";
             return false;
         }
         $str = str_replace('rdf:resource', 'rdf_resource', $str);
         $str = utf8_encode($str);
         $xml = simplexml_load_string($str);
         foreach ($xml->ConfirmedFoodWebLink as $rec) {
             foreach ($rec->predator[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $predator = trim(@$arr['fragment']);
                 $predator = str_replace("_", " ", $predator);
             }
             $pred_desc = trim($rec->predator_description);
             foreach ($rec->prey[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $prey = trim(@$arr['fragment']);
                 $prey = str_replace("_", " ", $prey);
             }
             $prey_desc = trim($rec->prey_description);
             foreach ($rec->observedInStudy[0]->attributes() as $attribute => $value) {
                 $arr = parse_url($value);
                 $ref_num = trim($arr['fragment']);
             }
             $arr_taxa[$predator]['desc'] = $pred_desc;
             $arr_taxa[$prey]['desc'] = $prey_desc;
             if (!@$arr_predator[$predator]) {
                 $arr_predator[$predator][] = $prey;
             }
             if (!@$arr_prey[$prey]) {
                 $arr_prey[$prey][] = $predator;
             }
             if (!in_array($prey, $arr_predator[$predator])) {
                 $arr_predator[$predator][] = $prey;
             }
             if (!in_array($predator, $arr_prey[$prey])) {
                 $arr_prey[$prey][] = $predator;
             }
             if (!@$arr_ref[$ref_num]['predator']) {
                 $arr_ref[$ref_num]['predator'][] = $predator;
             }
             if (!@$arr_ref[$ref_num]['prey']) {
                 $arr_ref[$ref_num]['prey'][] = $prey;
             }
             if (!in_array($predator, $arr_ref[$ref_num]['predator'])) {
                 $arr_ref[$ref_num]['predator'][] = $predator;
             }
             if (!in_array($prey, $arr_ref[$ref_num]['prey'])) {
                 $arr_ref[$ref_num]['prey'][] = $prey;
             }
         }
         foreach ($xml->Study as $rec) {
             $habitats = array();
             foreach ($rec->ofHabitat as $habitat) {
                 foreach ($habitat->attributes() as $attribute => $value) {
                     $arr = parse_url($value);
                     $habitat = trim($arr['fragment']);
                     $habitats[] = str_replace("_", " ", $habitat);
                 }
             }
             $habitats = implode(", ", $habitats);
             if ($habitats == "unknown") {
                 $habitats = "";
             }
             $place = self::parse_locality(trim($rec->locality));
             $country = @$place["country"];
             $state = @$place["state"];
             $locality = @$place["locality"];
             //debug
             /*
             if  (   is_numeric(stripos(trim($rec->titleAndAuthors),"Animal Diversity Web"))     ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Rockefeller"))              ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"data base of food webs"))   ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"foodwebs"))                 ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Webs on the Web"))          ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"NCEAS"))                    ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Interaction Web Database")) ||
                     is_numeric(stripos(trim($rec->titleAndAuthors),"Co-Operative Web Bank"))
                 )
             {print"\n problem here: [$i] [trim($rec->titleAndAuthors)]";}
             */
             $titleAndAuthors = trim($rec->titleAndAuthors);
             if ($titleAndAuthors == "Animal Diversity Web") {
                 $titleAndAuthors = "Myers, P., R. Espinosa, C. S. Parr, T. Jones, G. S. Hammond, and T. A. Dewey. 2006. The Animal Diversity Web (online). Accessed February 16, 2011 at http://animaldiversity.org. http://www.animaldiversity.org";
             }
             $reference[$ref_num] = array("titleAndAuthors" => $titleAndAuthors, "publicationYear" => trim($rec->publicationYear), "place" => trim($rec->locality), "country" => $country, "state" => $state, "locality" => $locality, "habitat" => $habitats);
         }
     }
     //main loop 1-259
     //for ancestry
     require_library('XLSParser');
     $parser = new XLSParser();
     $names = $parser->convert_sheet_to_array(SPIRE_PATH_ANCESTRY);
     $ancestry = array();
     foreach ($arr_taxa as $taxon => $temp) {
         $arr_taxa[$taxon]['objects'] = array("predator" => @$arr_predator[$taxon], "prey" => @$arr_prey[$taxon]);
         //start ancestry
         $key = array_search(trim($taxon), $names['tname']);
         if (strval($key) != "") {
             $parent_id = $names['parent_id'][$key];
             $ancestry = self::get_ancestry($key, $names);
             $arr_taxa[$taxon]['ancestry'] = $ancestry;
         }
     }
     /*
     print"<pre>";
         print_r($arr_taxa);
         print_r($arr_ref);
         print_r($reference);
     print"</pre>";
     */
     return array($arr_taxa, $arr_ref, $reference);
 }
예제 #10
0
 private function get_main_groups()
 {
     $groups = array();
     if ($html = Functions::get_remote_file($this->domain, array('timeout' => 9600, 'download_attempts' => 2, 'delay_in_minutes' => 5))) {
         if (preg_match_all("/href=\"vm_search\\.php(.*?)\"/ims", $html, $match)) {
             foreach ($match[1] as $line) {
                 if (preg_match("/database\\=(.*?)\\&/ims", $line, $match2)) {
                     $groups[] = $match2[1];
                 }
             }
         }
     } else {
         echo "\n investigate: main site is down\n";
     }
     print_r($groups);
     return $groups;
 }
예제 #11
0
 function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     if ($file_contents = Functions::get_remote_file($this->zip_path, array('timeout' => 999999, 'download_attempts' => 5))) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (file_exists($this->TEMP_FILE_PATH . "/all.xml")) {
             return TRUE;
         } else {
             return FALSE;
         }
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return FALSE;
     }
 }
예제 #12
0
function process_file1($file, $doc_id)
{
    global $wrap;
    global $used_taxa;
    print "{$wrap}";
    $str = Functions::get_remote_file($file);
    $str = clean_str($str);
    $str = str_ireplace('<br><br>', "&arr[]=", $str);
    $str = trim($str);
    $str = substr($str, 0, strlen($str) - 7);
    //to remove last part of string "&arr[]="
    //print "<hr>$str";
    $arr = array();
    parse_str($str);
    print "after parse_str recs = " . count($arr) . "{$wrap} {$wrap}";
    //print_r($arr);
    //print"<pre>";print_r($arr);print"</pre>";
    $i = 0;
    foreach ($arr as $str) {
        $str = clean_str($str);
        $str = str_ireplace("< /i>", "</i>", $str);
        //if($i >= 5)break; //debug        //ditox
        $i++;
        // if(in_array($i,array(8))){
        if (true) {
            //<b><i>Abrus precatorius</i></b>
            //get sciname
            $beg = '<b>';
            $end1 = '</i></b>';
            $end2 = '</i>';
            $end3 = '</b>';
            $sciname = strip_tags(trim(parse_html($str, $beg, $end1, $end2, $end3, $end1, "")));
            $sciname = str_ireplace(chr(13), "", $sciname);
            $sciname = str_ireplace(chr(10), "", $sciname);
            $sciname = trim($sciname);
            //get desc
            $str .= "xxx";
            $beg = '</i></b>';
            $end1 = 'xxx';
            $desc = strip_tags(trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, "")));
            $last_char_of_desc = substr($desc, strlen($desc) - 1, 1);
            if ($last_char_of_desc == ",") {
                $desc = substr($desc, 0, strlen($desc) - 1);
            }
            $desc .= ".";
            if ($sciname == "") {
                print "jjj";
            }
            print "{$i}. {$sciname} {$wrap}";
            //print "$desc";
            prepare_agent_rights($doc_id, $sciname, $desc);
        }
    }
    //main loop
}
예제 #13
0
 function build_id_list()
 {
     if (!($OUT = fopen($this->TEMP_FILE_PATH . "tropicos_ids.txt", "w"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->TEMP_FILE_PATH . "tropicos_ids.txt");
         return;
     }
     $startid = 0;
     // debug orig value 0; 1600267 with mediaURL and <location>; 1201245 with thumbnail size images
     //pagesize is the no. of records returned from Tropicos master list service
     $pagesize = 1000;
     // debug orig value 1000
     $count = 0;
     while (true) {
         $count++;
         $url = TROPICOS_API_SERVICE . "List?startid={$startid}&PageSize={$pagesize}&apikey=" . TROPICOS_API_KEY . "&format=json";
         echo "\n[{$count}] {$url}";
         if ($json_ids = Functions::get_remote_file($url, DOWNLOAD_WAIT_TIME, array('timeout' => 4800, 'download_attempts' => 5))) {
             $ids = json_decode($json_ids, true);
             $str = "";
             foreach ($ids as $id) {
                 if ($id["NameId"]) {
                     $str .= $id["NameId"] . "\n";
                     $startid = $id["NameId"];
                 } else {
                     echo "\n nameid undefined";
                 }
             }
             $startid++;
             // to avoid duplicate ids, set next id to get
             if ($str != "") {
                 fwrite($OUT, $str);
             }
         } else {
             echo "\n --server not accessible-- \n";
             break;
         }
         if ($count == 1300) {
             break;
         }
         // normal operation
         // break; //debug
     }
     fclose($OUT);
 }
예제 #14
0
 private function load_zip_contents($zip_path, $download_options, $files, $extension)
 {
     $text_path = array();
     $temp_path = create_temp_dir();
     if ($file_contents = Functions::get_remote_file($zip_path, $download_options)) {
         $parts = pathinfo($zip_path);
         $temp_file_path = $temp_path . "/" . $parts["basename"];
         if (!($TMP = Functions::file_open($temp_file_path, "w"))) {
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$temp_path}");
         if (file_exists($temp_path . "/" . $files[0] . $extension)) {
             foreach ($files as $file) {
                 $text_path[$file] = $temp_path . "/" . $file . $extension;
             }
         } else {
             return;
         }
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
     }
     return $text_path;
 }
예제 #15
0
<?php

namespace php_active_record;

include_once dirname(__FILE__) . "/../../config/environment.php";
$new_resource_path = DOC_ROOT . "temp/22.xml.gz";
$new_resource = Functions::get_remote_file("http://animaldiversity.ummz.umich.edu/XML/adw_eol.xml.gz");
// $new_resource = Functions::get_remote_file("http://localhost/eol_php_code/applications/content_server/resources/adw_eol.xml.gz");
if (!($OUT = fopen($new_resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $new_resource_path);
    return;
}
fwrite($OUT, $new_resource);
fclose($OUT);
shell_exec("gunzip -f " . $new_resource_path);
$new_resource_path = DOC_ROOT . "temp/22.xml";
$xml = file_get_contents($new_resource_path);
// $xml = str_replace("<dc:description>", "<dc:description><![CDATA[", $xml);
// $xml = str_replace("</dc:description>", "]]></dc:description>", $xml);
$xml = preg_replace("/<a>([^<]+)<\\/a>/", "\\1", $xml);
if (substr_count($xml, "<?xml") == 0) {
    $xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" . $xml;
}
$old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . "22.xml";
if (!($OUT = fopen($old_resource_path, "w+"))) {
    debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path);
    return;
}
fwrite($OUT, $xml);
fclose($OUT);
shell_exec("rm " . $new_resource_path);
예제 #16
0
 function get_taxa_from_html($url)
 {
     $html = Functions::get_remote_file($url, array('timeout' => 1200, 'download_attempts' => 5));
     //20mins download timeout, 5 retry attempts
     $parts = explode("<tr valign=bottom>", $html);
     // the first block doesn't contain name information so remove it
     array_shift($parts);
     // each block corresponds to a Family and its species
     foreach ($parts as $html_block) {
         // the last block will also have the tail end of the HTML which we also don't need
         if (preg_match("/^(.*?)<\\/table>/ims", $html_block, $arr)) {
             $html_block = $arr[1];
         }
         // pull out the family
         if (preg_match("/<b>(.*?)<\\/b>/ims", $html_block, $arr)) {
             $family = trim($arr[1]);
         } else {
             continue;
         }
         // sometimes the Family is really => ORDER: Family
         // Families can be Incertae Sedis, Genera Incertae Sedis, Genus Incertae Sedis, ...
         if (preg_match("/^([a-z]+): (.+)\$/ims", $family, $arr)) {
             $family = ucfirst(strtolower($arr[2]));
             $this->family_orders[$family] = ucfirst(strtolower($arr[1]));
         }
         if (preg_match_all("/<tr><td>(.*?)<\\/td><td><a href=\"species.jsp\\?avibaseid=(.*?)\">(.*?)<\\/a><\\/td><td>(.*?)<\\/td><\\/tr>/ims", $html_block, $matches, PREG_SET_ORDER)) {
             foreach ($matches as $match) {
                 $common_name = trim($match[1]);
                 $avibaseid = trim($match[2]);
                 $taxon_name = trim($match[3]);
                 $conservation_status = trim($match[4]);
                 if (preg_match("/<i>(.*?)<\\/i>/ims", $taxon_name, $arr)) {
                     $taxon_name = trim($arr[1]);
                 }
                 if ($metadata = @$this->names_in_families[$taxon_name]) {
                     // this means that in one regional checklist they place this taxon in a different family
                     if ($metadata['family'] != $family) {
                         debug("Family Conflict with {$taxon_name}\n");
                         continue;
                     }
                     // this means that in one regional checklist they use a different URL for the taxon
                     if ($metadata['avibaseid'] != $avibaseid) {
                         debug("ID Conflict with {$taxon_name}\n");
                         continue;
                     }
                 }
                 $this->names_in_families[$taxon_name] = array('taxon_name' => $taxon_name, 'family' => $family, 'common_name' => $common_name, 'avibaseid' => $avibaseid, 'conservation_status' => $conservation_status);
             }
         }
         if ($this->for_testing) {
             break;
         }
     }
 }
예제 #17
0
 private function load_zip_contents()
 {
     $this->TEMP_FILE_PATH = create_temp_dir() . "/";
     $options = $this->download_options;
     $options['timeout'] = 999999;
     if ($file_contents = Functions::get_remote_file($this->zip_path, $options)) {
         $parts = pathinfo($this->zip_path);
         $temp_file_path = $this->TEMP_FILE_PATH . "/" . $parts["basename"];
         if (!($TMP = fopen($temp_file_path, "w"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $temp_file_path);
             return;
         }
         fwrite($TMP, $file_contents);
         fclose($TMP);
         $output = shell_exec("unzip {$temp_file_path} -d {$this->TEMP_FILE_PATH}");
         if (!file_exists($this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv")) {
             $this->TEMP_FILE_PATH = str_ireplace(".zip", "", $temp_file_path);
             if (!file_exists($this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv")) {
                 return false;
             }
         }
         $this->text_path["IRMNG_DWC"] = $this->TEMP_FILE_PATH . "/IRMNG_DWC_20140131.csv";
         $this->text_path["IRMNG_DWC_SP_PROFILE"] = $this->TEMP_FILE_PATH . "/IRMNG_DWC_SP_PROFILE_20140131.csv";
         return true;
     } else {
         debug("\n\n Connector terminated. Remote files are not ready.\n\n");
         return false;
     }
 }
예제 #18
0
<?php

namespace php_active_record;

/* Tropical Lichens connector
estimated execution time: 8 seconds
Partner provides a service that resembles an EOL XML.
*/
include_once dirname(__FILE__) . "/../../config/environment.php";
$timestart = time_elapsed();
$resource_id = 69;
$url = 'http://www.tropicallichens.net/eolclient.aspx';
if ($xml_content = Functions::get_remote_file($url)) {
    $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml";
    if (!($OUT = fopen($resource_path, "w"))) {
        debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path);
        return;
    }
    fwrite($OUT, $xml_content);
    fclose($OUT);
} else {
    print "\n no contents {$i}";
}
$elapsed_time_sec = time_elapsed() - $timestart;
echo "\n";
echo "elapsed time = " . $elapsed_time_sec . " seconds   \n";
echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes   \n";
echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours  \n";
echo "\n\n Done processing.";
예제 #19
0
 function process($id)
 {
     $timestart = time_elapsed();
     echo "\n start timer";
     $file = WORMS_TAXON_API . $id;
     echo "{$file}\n";
     if ($contents = Functions::get_remote_file($file, array('timeout' => 600, 'download_attempts' => 5))) {
         if (simplexml_load_string($contents)) {
             $pos1 = stripos($contents, "<taxon>");
             $pos2 = stripos($contents, "</taxon>");
             if ($pos1 != "" and $pos2 != "") {
                 $contents = trim(substr($contents, $pos1, $pos2 - $pos1 + 8));
                 $elapsed_time_sec = time_elapsed() - $timestart;
                 echo "\n";
                 echo "elapsed time = " . $elapsed_time_sec . " seconds \n";
                 $this->exec_time_in_seconds += $elapsed_time_sec;
                 return $contents;
             }
         }
     }
     @($GLOBALS['WORMS_bad_id'] .= $id . ",");
     return false;
 }
 private function process_fruit_facts()
 {
     $fruit_paths = self::get_fruit_paths();
     if (!$fruit_paths) {
         return;
     }
     $this->search_terms = self::get_search_terms();
     $records = array();
     $i = 0;
     foreach ($fruit_paths as $path) {
         $i++;
         // if($i >= 2) break; // debug
         // if($i != 42) continue; // debug
         $record = array();
         if (preg_match("/\"(.*?)\"/ims", $path, $arr2)) {
             if ($arr2[1] == "ff/edible-palms.html") {
                 continue;
             }
             // mulitiple taxa in a page
             if ($arr2[1] == "ff/apricot_low_chill.html") {
                 continue;
             }
             // different HTML structure
             $url = $this->fruit_links["domain_fruit_facts"] . $arr2[1];
             if ($html = Functions::get_remote_file($url, array('download_wait_time' => 3000000, 'timeout' => 240, 'download_attempts' => 2, 'delay_in_minutes' => 1))) {
                 $record["source_url"] = $url;
                 //manual adjustments
                 $html = str_replace("<b>Soils</b>", "<b>Soils:</b>", $html);
                 $html = str_replace("<b>Related species</b>", "<b>Related species:</b>", $html);
                 $html = str_replace("<b>Adaptation</b>", "<b>Adaptation:</b>", $html);
                 $html = str_replace("<b>Pruning</b>", "<b>Pruning:</b>", $html);
                 if (preg_match_all("/<h2><i>(.*?)<\\/i>/ims", $html, $arr3)) {
                     $record["taxon"] = trim(strip_tags($arr3[1][0]));
                     $record["taxon_id"] = str_replace(" ", "_", $record["taxon"]);
                     $record["family"] = strip_tags($arr3[1][1]);
                 }
                 if (preg_match("/src=\"(.*?)\"/ims", $html, $arr3)) {
                     $record["image"] = $arr3[1];
                 }
                 if (preg_match("/&\\#169;(.*?)Questions or comments/ims", $html, $arr3)) {
                     $holder = "&#169; " . trim($arr3[1]);
                     $holder = strip_tags($holder, "<a>");
                     $holder = str_ireplace('href="/index.html"', 'href="' . $this->site_domain . 'index.html"', $holder);
                     $record["rightsHolder"] = strip_tags($holder);
                 }
                 // actual text descriptions
                 foreach ($this->search_terms as $key => $value) {
                     $end_strings = array("<p>", "<h");
                     // possible end strings
                     foreach ($end_strings as $end_string) {
                         if (isset($record[$value])) {
                             break;
                         }
                         if (preg_match("/{$key}<\\/b>(.*?){$end_string}/ims", $html, $arr3)) {
                             $record[$value] = $arr3[1];
                         }
                     }
                 }
                 foreach ($record as $key => $value) {
                     $record[$key] = str_ireplace(array("\n"), " ", $value);
                 }
                 $record = array_filter(array_map('trim', $record));
             }
         }
         print "\n count: " . count($record) . "\n";
         if ($record) {
             $records[$record["taxon"]] = $record;
         }
     }
     // end foreach
     return $records;
 }
예제 #21
0
 private function get_html($url)
 {
     if ($html = Functions::get_remote_file($url, array('download_wait_time' => 1000000, 'timeout' => 9600, 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         return $html;
     } else {
         if ($html = self::curl_get_file_contents($url)) {
             echo "\n Got it using 'curl_get_file_contents()' \n";
             return $html;
         }
     }
     return false;
 }
예제 #22
0
<?php

exit;
/* AntWeb is now giving us a complete resource XML with <mediaURL> element for their image objects */
include_once dirname(__FILE__) . "/../../config/environment.php";
$mysqli =& $GLOBALS['mysqli_connection'];
$file = trim(Functions::get_remote_file("http://antweb.org/getEOL.do"));
//$file = trim(Functions::get_remote_file("../../temp/ants.xml"));
//echo "$file";
$xml = simplexml_load_string($file);
$taxon_index = 0;
foreach ($xml->taxon as $taxon) {
    $i = 0;
    $label_index = 0;
    foreach ($taxon->dataObject as $dataObject) {
        $i++;
        $dataObject_dc = $dataObject->children("http://purl.org/dc/elements/1.1/");
        if ($identifier = @$dataObject_dc->identifier) {
            if (preg_match("/^\\/images\\//", $identifier, $arr)) {
                if (preg_match("/_l_[0-9]{1,}_high\\.jpg/", $identifier)) {
                    $label_index = $i;
                    continue;
                }
                //echo "$identifier\n";
                $dataObject->addChild("mediaURL", "http://www.antweb.org" . str_replace(" ", "%20", $identifier));
            }
        }
    }
    if ($label_index) {
        $label_index -= 1;
        //echo "Unsetting taxon[$taxon_index]->dataObject[$label_index]\n";
예제 #23
0
 private function collect_dataset_attribution()
 {
     $this->dataset_metadata = array();
     if (is_dir($this->harvest_event->resource->archive_path() . "dataset") && file_exists($this->harvest_event->resource->archive_path() . "dataset/col.xml")) {
         foreach (glob($this->harvest_event->resource->archive_path() . "dataset/*") as $filename) {
             if (preg_match("/\\/([0-9]+)\\.xml\$/", $filename, $arr)) {
                 $dataset_id = $arr[1];
             }
             $xml = simplexml_load_file($filename);
             $title = trim($xml->dataset->title);
             if (preg_match("/^(.*) in the Catalogue of Life/", $title, $arr)) {
                 $title = trim($arr[1]);
             }
             $title = str_replace("  ", " ", $title);
             $editors = trim($xml->additionalMetadata->metadata->sourceDatabase->authorsAndEditors);
             if (preg_match("/^(.*)\\. For a full list/", $editors, $arr)) {
                 $editors = trim($arr[1]);
             }
             if (preg_match("/^(.*); for detailed information/", $editors, $arr)) {
                 $editors = trim($arr[1]);
             }
             $editors = str_replace("  ", " ", $editors);
             $abbreviatedName = trim($xml->additionalMetadata->metadata->sourceDatabase->abbreviatedName);
             $this->dataset_metadata[$abbreviatedName]['title'] = $title;
             $this->dataset_metadata[$abbreviatedName]['editors'] = $editors;
             $this->dataset_metadata[$abbreviatedName]['abbreviatedName'] = $abbreviatedName;
             $this->dataset_metadata[$abbreviatedName]['datasetID'] = $dataset_id;
             $this->dataset_metadata[$dataset_id] =& $this->dataset_metadata[$abbreviatedName];
         }
         // now go grab the citation information from the COL website
         $url = "http://www.catalogueoflife.org/col/info/cite";
         $options_for_log_harvest = array('resource_id' => $this->harvest_event->resource->id);
         $html = Functions::get_remote_file($url, $options_for_log_harvest);
         preg_match_all("/<p><strong>(.*?)<\\/strong><br\\/>(.*?)<\\/p>/ims", $html, $matches, PREG_SET_ORDER);
         foreach ($matches as $match) {
             $dataset_name = $match[1];
             if (preg_match("/^(.*) via ITIS/", $dataset_name, $arr)) {
                 $dataset_name = trim($arr[1]);
             }
             $citation = $match[2];
             if (isset($this->dataset_metadata[$dataset_name])) {
                 $this->dataset_metadata[$dataset_name]['citation'] = $citation;
             } elseif ($dataset_name == "Species 2000 Common Names" && isset($this->dataset_metadata["Catalogue of Life"])) {
                 $this->dataset_metadata["Catalogue of Life"]['citation'] = $citation;
             }
         }
         if (!isset($this->dataset_metadata["Catalogue of Life"]['citation']) || !isset($this->dataset_metadata["FishBase"]['citation'])) {
             echo "Tried getting attribution for Catalogue of Life datasets, but there was a problem\n";
             write_to_resource_harvesting_log("Tried getting attribution for Catalogue of Life datasets, but there was a problem");
             exit;
         }
     }
 }
 function get_title_description($type = null, $taxon_name)
 {
     foreach ($GLOBALS['taxon'] as $taxon_name => $value) {
         if (@$GLOBALS['taxon'][$taxon_name]['sciname'] || @$GLOBALS['taxon'][$taxon_name]['texts']) {
             continue;
         }
         sleep(5);
         //debug
         $url = $value['html'];
         if ($this->debug_info) {
             print "\n\n {$url} -- {$taxon_name}";
         }
         $trials = 1;
         $success = 0;
         while ($success == 0 && $trials < 5) {
             if ($html = Functions::get_remote_file($url)) {
                 $success = 1;
             } else {
                 $trials++;
                 print "\n Down: {$url}";
                 print "\n Will wait for 30 seconds and will try again. Trial #" . $trials;
                 sleep(30);
             }
         }
         if ($trials >= 5) {
             print "\n Will skip to the next species after {$trials} unsuccessful trials";
             continue;
         }
         if (preg_match("/<FONT SIZE=\"\\+3\">(.*?)<\\/FONT>/ims", $html, $arr)) {
             $GLOBALS['taxon'][$taxon_name]['sciname'] = self::clean_str(strip_tags($arr[1]));
         } elseif (preg_match("/<FONT SIZE=\"\\+2\">(.*?)<\\/FONT>/ims", $html, $arr)) {
             $GLOBALS['taxon'][$taxon_name]['sciname'] = self::clean_str(strip_tags($arr[1]));
         }
         $GLOBALS['taxon'][$taxon_name]['sciname'] = str_ireplace('&amp;', '&', $GLOBALS['taxon'][$taxon_name]['sciname']);
         if (preg_match("/<FONT SIZE=\"\\+4\">(.*?)<\\/FONT>/ims", $html, $arr)) {
             $GLOBALS['taxon'][$taxon_name]['comnames'][] = self::clean_str($arr[1]);
         }
         if ($GLOBALS['taxon'][$taxon_name]['sciname'] == "Didymopanax morototoni (Aubl.) Decne. & Planch.") {
             $GLOBALS['taxon'][$taxon_name]['sciname'] = "Schefflera morototoni";
             $GLOBALS['taxon'][$taxon_name]['comnames'] = array();
         } elseif ($GLOBALS['taxon'][$taxon_name]['sciname'] == "Chamaecyparis nootkatensis (D. Don)  Spach") {
             $GLOBALS['taxon'][$taxon_name]['sciname'] = "Cupressus nootkatensis";
             $GLOBALS['taxon'][$taxon_name]['comnames'] = array();
         }
         // manual adjustment
         $html = str_ireplace('<H2></H2>', '', $html);
         //only for hardwoods
         $html = str_ireplace('<H3>', '<xxx><H3>', $html);
         $html = str_ireplace('<H2>', '<xxx><H2>', $html);
         $html = str_ireplace('<H1>', '<xxx><H1>', $html);
         //only for hardwoods
         $html = str_ireplace('<H4>', '<xxx><H4>', $html);
         //only for hardwoods
         if ($type == "hardwoods") {
             $html = str_ireplace('<P><FONT><B></B></FONT></P>', '', $html);
             //only for hardwoods
             $html = str_ireplace('<P><FONT><B>', '<P><B>', $html);
             //only for hardwoods
             $html = str_ireplace('</B></FONT></P>', '</B></P>', $html);
             //only for hardwoods
             $html = str_ireplace('Damaging Agents-Robusta </B>', 'Damaging Agents-</B> Robusta ', $html);
             $html = str_ireplace('Growth and Yield-Black </B>', 'Growth and Yield-</B> Black ', $html);
             $html = str_ireplace('Rooting Habit-Aigeiros-</B>', 'Rooting Habit-</B> Aigeiros- ', $html);
             $html = str_ireplace('Growth and Yield-Bitternut </B>', 'Growth and Yield-</B> Bitternut ', $html);
         } elseif ($type == "conifers") {
             $html = str_ireplace('<P><B>Vegetative Reproduction-White-cedar </B>', '<P><B>Vegetative Reproduction-</B> White-cedar ', $html);
             //only for conifers
             $html = str_ireplace('<P><B>Seedling Development-Germination </B>', '<P><B>Seedling Development-</B> Germination ', $html);
             //only for conifers
         }
         $html = str_ireplace('<P><B><FONT SIZE="+1">Native Range</FONT></B></P>', '<H3>Native Range</H3>', $html);
         $html = str_ireplace('<H2>Native Range</H2>', '<H3>Native Range</H3>', $html);
         // manual adjustment - hardwoods
         $html = str_ireplace(array("<H1></H1>", "<H2></H2>"), "", trim($html));
         // to properly get 'brief summary'
         $texts = array();
         // brief summary - start ---------------------
         $brief_summary = "";
         if ($type == "hardwoods") {
             if (preg_match("/<\\/FONT><\\/H1>(.*?)<H/ims", $html, $match)) {
                 $brief_summary = $match[1];
             }
             if ($brief_summary == "") {
                 if (preg_match("/<\\/B><\\/P>(.*?)<H/ims", $html, $match)) {
                     $brief_summary = trim($match[1]);
                 }
             }
         } else {
             if (preg_match("/<\\/B><\\/P>(.*?)<H/ims", $html, $match)) {
                 $brief_summary = trim($match[1]);
             }
         }
         $brief_summary_with_all_tags = str_ireplace('<xxx>', '', $brief_summary);
         $brief_summary = strip_tags($brief_summary_with_all_tags, "<p><i>");
         $brief_summary = str_ireplace('ALIGN="CENTER"', '', $brief_summary);
         if ($brief_summary) {
             $texts[] = array("title" => "brief summary", "description" => $brief_summary);
         }
         $agents_and_family_info = self::get_agents_and_family($brief_summary_with_all_tags, trim($url));
         $GLOBALS['taxon'][$taxon_name]['agents'] = $agents_and_family_info['agents'];
         $GLOBALS['taxon'][$taxon_name]['ancestry']['family'] = $agents_and_family_info['family'];
         // brief summary - end ---------------------
         // get "<H2>Special Uses</H2>" and "<H2>Genetics</H2>" independently
         $special_uses = "";
         $genetics = "";
         if (preg_match("/<H2>Special Uses<\\/H2>(.*?)<xxx>/ims", $html, $match) || preg_match("/<H1>Special Uses<\\/H1>(.*?)<xxx>/ims", $html, $match)) {
             $special_uses = $match[1];
         }
         if (preg_match("/<H2>Genetics<\\/H2>(.*?)<xxx>/ims", $html, $match) || preg_match("/<H1>Genetics<\\/H1>(.*?)<xxx>/ims", $html, $match)) {
             $genetics = trim($match[1]);
         }
         if ($genetics == "") {
             //http://www.na.fs.fed.us/spfo/pubs/silvics_manual/Volume_1/larix/occidentalis.htm
             if (preg_match("/<H2>Genetics<\\/H2>(.*?)<H2>Literature Cited/ims", $html, $match)) {
                 $genetics = strip_tags(trim($match[1]), "<P><I>");
             }
         }
         if (preg_match("/<H3>Native Range<\\/H3>(.*?)<xxx>/ims", $html, $match) || preg_match("/Native Range<\\/FONT><\\/H4>(.*?)<xxx>/ims", $html, $match) || preg_match("/Range<\\/FONT><\\/H4>(.*?)<xxx>/ims", $html, $match)) {
             $native_range = $match[1];
             if (preg_match("/<IMG SRC\\=\"(.*?)\"/ims", $match[1], $map)) {
                 $path_parts = pathinfo($url);
                 $map_url = $path_parts['dirname'] . "/" . $map[1];
                 $texts[] = array("title" => "maps tab", "description" => $map_url);
                 $native_range = str_ireplace($map[1], $map_url, $native_range);
             }
             $texts[] = array("title" => "Native Range", "description" => $native_range);
         }
         if (preg_match("/<H3>Climate<\\/H3>(.*?)<xxx>/ims", $html, $match) || preg_match("/<H2>Climate<\\/H2>(.*?)<xxx>/ims", $html, $match) || preg_match("/Climate<\\/FONT><\\/H4>(.*?)<xxx>/ims", $html, $match)) {
             $texts[] = array("title" => "Climate", "description" => $match[1]);
         }
         if (preg_match("/<H3>Soils and Topography<\\/H3>(.*?)<xxx>/ims", $html, $match) || preg_match("/<H2>Soils and Topography<\\/H2>(.*?)<xxx>/ims", $html, $match) || preg_match("/Soils and Topography<\\/FONT><\\/H4>(.*?)<xxx>/ims", $html, $match)) {
             $texts[] = array("title" => "Soils and Topography", "description" => $match[1]);
         }
         if (preg_match("/<H3>Associated Forest Cover<\\/H3>(.*?)<xxx>/ims", $html, $match) || preg_match("/<H2>Associated Forest Cover<\\/H2>(.*?)<xxx>/ims", $html, $match) || preg_match("/Associated Forest Cover<\\/FONT><\\/H4>(.*?)<xxx>/ims", $html, $match)) {
             $texts[] = array("title" => "Associated Forest Cover", "description" => $match[1]);
         }
         if (preg_match_all("/<H3>(.*?)<xxx>/ims", $html, $matches)) {
             foreach ($matches[1] as $match) {
                 $title = "";
                 $description = "";
                 if (preg_match("/eee(.*?)<\\/H3>/ims", "eee" . $match, $arr)) {
                     $title = strip_tags(trim($arr[1]));
                 }
                 if (preg_match("/<\\/H3>(.*?)eee/ims", $match . "eee", $arr)) {
                     $description = trim($arr[1]);
                     $texts = self::divide_whole_text_to_texts($description, $texts);
                 } else {
                     echo "\n 111 walang text within texts...\n";
                     return false;
                 }
                 /* this is if you want to get the entire text section as 1 <dataObject>
                    if($title) $texts[] = array("title" => $title, "description" => $description);
                    */
             }
         }
         if ($type == "hardwoods") {
             // e.g. Acer macrophyllum
             if (preg_match_all("/<H2>(.*?)<xxx>/ims", $html, $matches)) {
                 foreach ($matches[1] as $match) {
                     $title = "";
                     $description = "";
                     if (preg_match("/eee(.*?)<\\/H2>/ims", "eee" . $match, $arr)) {
                         $title = strip_tags(trim($arr[1]));
                     }
                     if (preg_match("/<\\/H2>(.*?)eee/ims", $match . "eee", $arr)) {
                         $description = trim($arr[1]);
                         $texts = self::divide_whole_text_to_texts($description, $texts);
                     } else {
                         echo "\n 222 walang text within texts...\n";
                         return false;
                     }
                 }
             }
             // e.g. Acer nigrum
             if (preg_match_all("/<H4>(.*?)<xxx>/ims", $html, $matches)) {
                 foreach ($matches[1] as $match) {
                     $title = "";
                     $description = "";
                     if (preg_match("/eee(.*?)<\\/H4>/ims", "eee" . $match, $arr)) {
                         $title = strip_tags(trim($arr[1]));
                     }
                     if (preg_match("/<\\/H4>(.*?)eee/ims", $match . "eee", $arr)) {
                         $description = trim($arr[1]);
                         $texts = self::divide_whole_text_to_texts($description, $texts);
                     } else {
                         echo "\n 333 walang text within texts...\n";
                         return false;
                     }
                 }
             }
         }
         if ($genetics) {
             $texts[] = array("title" => "Genetics", "description" => $genetics);
         }
         if ($special_uses) {
             $texts[] = array("title" => "Special Uses", "description" => $special_uses);
         }
         $GLOBALS['taxon'][$taxon_name]['texts'] = $texts;
         $html = str_ireplace("Literature Cited </H2>", "Literature Cited</H2>", $html);
         if (preg_match("/Literature Cited<\\/H2>(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         } elseif (preg_match("/Literature Cited<\\/H1>(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         } elseif (preg_match("/Literature Cited<\\/H3>(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         } elseif (preg_match("/Literature Cited<\\/H4>(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         } elseif (preg_match("/Literature Cited<\\/FONT><\\/H4>(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         } elseif (preg_match("/<\\/B>Literature Cited(.*?)<\\/BODY>/ims", $html, $match)) {
             $GLOBALS['taxon'][$taxon_name]['taxon_ref'] = $match[1];
         }
     }
 }
 function get_title_description($type = null)
 {
     foreach ($GLOBALS['taxon'] as $taxon_name => $value) {
         // if($taxon_name != "Hylaeus affinis") continue; //debug
         if (@$value['association'] != "" || @$value['gendesc'] != "") {
             continue;
         }
         $url = $this->path . '/insects/' . $value['html'];
         if ($type == 'insects') {
             $url = str_ireplace("/insects/", "/", $url);
         }
         $GLOBALS['taxon'][$taxon_name]['html'] = $url;
         if ($this->debug_info) {
             echo "\n {$url} -- {$taxon_name}";
         }
         if (!($html = Functions::get_remote_file($url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5)))) {
             echo "\n\n Content partner's server is down4, {$url}\n";
             $GLOBALS['taxon'][$taxon_name]['association'] = 'no object';
             continue;
         }
         if (preg_match("/<big>(.*?)<\\/td>/ims", $html, $match)) {
             $desc = strip_tags(self::clean_str($match[1]), "<BR><I>");
             $desc = self::clean_str($desc);
             $desc = utf8_encode($desc);
             $GLOBALS['taxon'][$taxon_name]['association'] = $desc;
             $GLOBALS['taxon'][$taxon_name]['association_title'] = "Plant-Feeding Insects of <i>{$taxon_name}</i> in Illinois";
             if (preg_match("/\\[(.*?)\\]/ims", $desc, $string_match)) {
                 $GLOBALS['taxon'][$taxon_name]['ancestry']['family'] = $string_match[1];
             }
         }
     }
 }
예제 #26
0
 private function get_map_data($url)
 {
     $rec = array();
     if ($html = Functions::get_remote_file($url, array('timeout' => 999999, 'download_attempts' => 2, 'delay_in_minutes' => 2))) {
         // manual adjustment
         if ($url == "http://entnemdept.ufl.edu/walker/buzz/334m.htm") {
             $html = str_ireplace('<div align="center">', '</div><div align="center">', $html);
         }
         if (preg_match("/<b>(.*?)<\\/b>/ims", $html, $arr)) {
             $rec["vernacular"] = strip_tags($arr[1]);
         } else {
             echo "\n investigate no vernacular [{$url}]";
         }
         if (preg_match("/<i>(.*?)<\\/i>/ims", $html, $arr)) {
             $rec["sciname"] = strip_tags($arr[1]);
         } else {
             echo "\n investigate no sciname [{$url}]";
         }
         if (preg_match_all("/<div align=\"center\">(.*?)<\\/div>/ims", $html, $arr)) {
             $temp = $arr[1];
             if (@$temp[1]) {
                 $caption = $temp[1];
             } elseif (@$temp[0]) {
                 $caption = $temp[0];
             }
             //http://entnemdept.ufl.edu/walker/buzz/302m.htm
             if (preg_match("/<img src=\"(.*?)\"/ims", $caption, $arr)) {
                 $map_image = $arr[1];
                 $rec["map"] = $this->sina_domain . $map_image;
             } else {
                 if ($map_image = self::get_map_image_retry($html)) {
                     $rec["map"] = $this->sina_domain . $map_image;
                     echo "\n retry successfull\n";
                 } else {
                     echo "\n investigate no map image [{$url}]\n";
                     echo "\n investigate retry still no map 1 [{$url}]\n";
                     return array();
                 }
             }
             $caption = trim(strip_tags($caption, "<br><a>"));
             $caption = str_ireplace(array("\n", chr(13), chr(10), "\t"), "", $caption);
             if (substr($caption, 0, 4) == "<br>") {
                 $caption = trim(substr($caption, 4, strlen($caption)));
             }
             $caption = str_ireplace(array("<br>  "), "<br>", $caption);
             $caption = str_ireplace('"> Computer-generated', '">Computer-generated', $caption);
             if (preg_match("/<a href=\"(.*?)\">Computer-generated/ims", $caption, $arr)) {
                 $rec["computer_gen_map"] = $this->sina_domain . $arr[1];
             } elseif (preg_match("/<a href=\"(.*?)\">  Computer-generated/ims", $caption, $arr)) {
                 $rec["computer_gen_map"] = $this->sina_domain . $arr[1];
             } elseif (preg_match("/<a href=\"(.*?)\">County-level distribution map/ims", $caption, $arr)) {
                 $rec["computer_gen_map"] = $this->sina_domain . $arr[1];
             }
             // else echo "\n investigate no computer gen map [$url]\n"; acceptable case
             //further check for 'computer_gen_map' e.g. http://entnemdept.ufl.edu/walker/buzz/123m.htm or 318m.htm
             if (is_numeric(stripos(@$rec["computer_gen_map"], "href="))) {
                 for ($x = 0; $x <= 10; $x++) {
                     if (preg_match("/<a href=\"(.*?)xxx/ims", $rec["computer_gen_map"] . "xxx", $arr)) {
                         $rec["computer_gen_map"] = $this->sina_domain . $arr[1];
                     } else {
                         break;
                     }
                 }
             }
             $caption = str_ireplace('href="', 'href="' . $this->sina_domain, $caption);
             $caption = str_ireplace('Computer-generated distribution map', '<br>See also this computer-generated U.S. distribution map', $caption);
             $rec["caption"] = $caption;
             echo "\n caption: [{$caption}]\n";
             $rec["as_of"] = self::get_as_of_date($caption);
         } else {
             // e.g. http://entnemdept.ufl.edu/walker/buzz/401m.htm
             echo "\n investigate no <div> [{$url}]\n";
             if ($map_image = self::get_map_image_retry($html)) {
                 $rec["map"] = $this->sina_domain . $map_image;
                 if (preg_match("/<p>(.*?)\\./ims", $html, $arr)) {
                     $caption = strip_tags($arr[1]) . ".";
                     $rec["caption"] = $caption;
                     $rec["as_of"] = self::get_as_of_date($caption);
                 }
                 echo "\n retry successfull\n";
             } else {
                 echo "\n investigate retry still no map 2 [{$url}]\n";
                 return array();
             }
         }
     } else {
         echo "\n investigate 03 [{$url}]";
     }
     $rec["link_back"] = $url;
     return $rec;
 }
예제 #27
0
function parse_contents($str)
{
    global $wrap;
    global $site_url;
    /* it can be:
       <a href="speciesdetail.cfm?genus=Abyssopathes&subgenus=&species=lyra&subspecies=&synseniorid=9266&validspecies=Abyssopathes%20lyra&authorship=%28Brook%2C%201889%29">Abyssopathes lyra (Brook, 1889)</a>
       or
       <a href="speciesdetail_for_nosyn.cfm?species=dentata&genus=Sandalolitha&subgenus=&subspecies=">Sandalolitha dentata Quelch, 1884</a>
       //
       */
    $temp = '';
    $beg = 'speciesdetail.cfm?';
    $end1 = '</a>';
    $temp = trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == '') {
        $beg = 'speciesdetail_for_nosyn.cfm?';
        $end1 = '</a>';
        $temp = trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, ""));
    }
    //anemone2/speciesdetail_for_nosyn.cfm?spe
    $temp = '<a href="' . $site_url . '' . $beg . $temp . "</a>";
    //get url_for_main_menu
    $beg = '="';
    $end1 = '">';
    $url_for_main_menu = trim(parse_html($temp, $beg, $end1, $end1, $end1, $end1, ""));
    //print"$wrap [<a href='$url_for_main_menu'>url_for_main_menu</a>]";
    //end url_for_main_menu
    //get sciname
    $beg = '">';
    $end1 = '</a>';
    $taxa = trim(parse_html($temp, $beg, $end1, $end1, $end1, $end1, ""));
    print "{$wrap} taxa[{$taxa}]";
    //end sciname
    $main_menu = Functions::get_remote_file($url_for_main_menu);
    //get url for images page
    $url_for_images_page = "";
    //"images.cfm?&genus=Abyssopathes&subgenus=&species=lyra&subspecies=&seniorid=9266&validspecies=Abyssopathes%20lyra&authorship=%28Brook%2C%201889%29">Images</a>
    $beg = 'images.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $arr_images = array();
    if ($temp != "") {
        $url_for_images_page = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_images_page'>images</a>]";
        ///*
        $arr_images = get_images($url_for_images_page);
        //*/
    } else {
        print "{$wrap} no images";
    }
    //end url for images page
    //get url for classification
    $url_for_classification = "";
    //"showclassification2.cfm?synseniorid=2914&genus=Aiptasiogeton&subgenus=&species=eruptaurantia&subspecies=&origgenus=Actinothoe&origspecies=eruptaurantia&origsubspecies=&origsubgenus=&&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Classification</a>
    $beg = 'showclassification2.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == "") {
        //http://hercules.kgs.ku.edu/hexacoral/anemone2/classification_path_no_syn.cfm?genus=Astr%C3%A6a&subgenus=&species=abdita&subspecies=
        $beg = 'classification_path_no_syn.cfm';
        $end1 = '">';
        $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    }
    if ($temp != "") {
        $url_for_classification = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_classification'>classification</a>]";
        $arr_classification = get_tabular_data($url_for_classification, "classification");
        if ($arr_classification) {
            $arr_classification = parse_classification($arr_classification);
        }
    } else {
        print "{$wrap} no classification";
    }
    //end url for classification
    //get url for strict_synonymy
    $url_for_strict_synonymy = "";
    //"synonymy_strict.cfm?seniorid=2914&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Strict synonymy</a>
    $beg = 'synonymy_strict.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp != "") {
        $url_for_strict_synonymy = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_strict_synonymy'>strict_synonymy</a>]";
        $arr_synonyms = get_tabular_data($url_for_strict_synonymy, "synonyms");
    } else {
        print "{$wrap} no strict_synonymy";
    }
    //end url for strict_synonymy
    //get url for references
    $url_for_references = "";
    //"all_mentions_of_names2.cfm?species...
    $beg = 'all_mentions_of_names.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == "") {
        $beg = 'all_mentions_of_names2.cfm';
        $end1 = '">';
        $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    }
    $arr_references = array();
    if ($temp != "") {
        $url_for_references = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_references'>references</a>]";
        $arr_references = get_tabular_data($url_for_references, "references");
        //start process
        $arr = array();
        foreach ($arr_references as $value) {
            $temp = "";
            foreach ($value as $item) {
                $temp .= "." . $item;
            }
            $temp = trim(substr($temp, 1, strlen($temp)));
            //to remove the '.' on the first char
            //<a href="reference_detail.cfm?ref_number=58&type=Article">
            $temp = str_ireplace("reference_detail.cfm", $site_url . "reference_detail.cfm", $temp);
            //if we want to remove the anchor
            //$temp = get_str_from_anchor_tag($temp);
            $arr["{$temp}"] = 1;
        }
        $arr_references = array_keys($arr);
    } else {
        print "{$wrap} no references";
    }
    //end url for references
    //get url for common_names
    $url_for_common_names = "";
    //"common.cfm?seniorid=2914&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Strict synonymy</a>
    $beg = 'common.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $arr_common_names = array();
    if ($temp != "") {
        $url_for_common_names = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_common_names'>common_names</a>]";
        $arr_common_names = get_tabular_data($url_for_common_names, "common_names");
        //start process
        $arr = array();
        foreach ($arr_common_names as $value) {
            //$temp = strtolower($value[0]); //not a good idea especially for special chars
            $temp = $value[0];
            $temp = trim(get_str_from_anchor_tag($temp));
            //print"[$temp]";
            $arr["{$temp}"] = 1;
        }
        $arr_common_names = array_keys($arr);
    } else {
        print "{$wrap} no common_names";
    }
    //end url for common_names
    //get url for skeletons
    //e.g. for species (Favites abdita) with skeleton
    $url_for_skeletons = "";
    //http://hercules.kgs.ku.edu/hexacoral/anemone2/skeleton.cfm?genus=Favites&subgenus=&species=abdita&subspecies=
    $beg = 'skeleton.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_skeletons = "";
    if ($temp != "") {
        $url_for_skeletons = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_skeletons'>skeletons</a>]";
        $arr_skeletons = get_tabular_data($url_for_skeletons, "skeletons");
        if ($arr_skeletons) {
            $arr_fields = array("Author", "Skeleton?", "Mineral or Organic?", "Mineral", "Percent Magnesium");
            $html_skeletons = arr2html($arr_skeletons, $arr_fields, $url_for_main_menu);
            $html_skeletons = "<div style='font-size : small;'>{$html_skeletons}</div>";
        }
    } else {
        print "{$wrap} no skeletons";
    }
    //end url for skeleton
    //get url for biological_associations
    $url_for_biological_associations = "";
    $beg = 'symbiont_info.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_biological_associations = "";
    if ($temp != "") {
        $url_for_biological_associations = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_biological_associations'>biological_associations</a>]";
        $arr_biological_associations = get_tabular_data($url_for_biological_associations, "biological_associations");
        $arr_fields = array("Algal symbionts");
        $html_biological_associations = arr2html($arr_biological_associations, $arr_fields, $url_for_main_menu);
        $html_biological_associations = "<div style='font-size : small;'>{$html_biological_associations}</div>";
    } else {
        print "{$wrap} no biological_associations";
    }
    //end url for biological_associations
    //get url for nematocysts
    $url_for_nematocysts = "";
    $beg = 'cnidae_information.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_nematocysts = "";
    if ($temp != "") {
        $url_for_nematocysts = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_nematocysts'>nematocysts</a>]";
        $arr_nematocysts = get_tabular_data($url_for_nematocysts, "nematocysts");
        $arr_fields = array("Location", "Image", "Cnidae Type", "Range of <br> Lengths (m)", " ", "Range of <br >Widths (m)", "n", "N", "State");
        $html_nematocysts = arr2html($arr_nematocysts, $arr_fields, $url_for_main_menu);
        $html_nematocysts = "<div style='font-size : small;'>{$html_nematocysts}</div>";
        //to have the 2nd row have colspan=9
        $html_nematocysts = str_ireplace("</th></tr><tr><td>", "</th></tr><tr><td colspan='9'>", $html_nematocysts);
    } else {
        print "{$wrap} no nematocysts";
    }
    //end url for nematocysts
    //get url for specimens
    /*
        $url_for_specimens="";
        //all_specimens_xml.cfm?
        $beg='all_specimens_xml.cfm'; $end1='">';
        $temp = trim(parse_html($main_menu,$beg,$end1,$end1,$end1,$end1,""));
        $arr_specimens=array();
        if($temp != "")
        {
            $url_for_specimens = $site_url . $beg . $temp;
            print"$wrap [<a href='$url_for_specimens'>specimens</a>]";
            $arr_specimens = get_tabular_data($url_for_specimens,"specimens");
            //start process
            $arr=array();
            foreach ($arr_specimens as $value)
            {
                $temp = @$value[5];
                $arr["$temp"]=1;
            }
            $arr_specimens = array_keys($arr);
        }else print"$wrap no specimens";
    */
    //end url for specimens
    //print"<hr>$main_menu";
    //========================================================================================
    //return array ($id,$image_url,$description,$desc_pic,$desc_taxa,$categories,$taxa,$copyright,$providers,$creation_date,$photo_credit,$outlinks);
    return array($taxa, $url_for_main_menu, $arr_classification, $arr_images, $html_skeletons, $url_for_skeletons, $html_biological_associations, $url_for_biological_associations, $arr_common_names, $arr_references, $html_nematocysts, $url_for_nematocysts);
}
예제 #28
0
        foreach ($resource->attributes() as $a => $b) {
            $attributes[$a] = $b;
        }
        $file_names[trim($attributes["name"])] = 1;
    }
}
$start = false;
$i = 0;
krsort($file_names);
foreach ($file_names as $file_name => $v) {
    $i++;
    //if($file_name!="5959_tx.xml") continue;
    //if($file_name!="2006_Huveneers_gg1_tx.xml") continue;
    echo "{$file_name}<br>\n";
    $url = $prefix . $file_name;
    $file_contents = Functions::get_remote_file($url);
    if (!$file_contents) {
        echo "downloading failed\n";
    }
    $file_contents = str_replace("<xhtml:p xmlns:xhtml=\"http://www.w3.org/1999/xhtml\">", htmlspecialchars("<p>"), $file_contents);
    $file_contents = str_replace("</xhtml:p>", htmlspecialchars("</p>"), $file_contents);
    if (!($OUT = fopen($download_cache_path, "w+"))) {
        debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $download_cache_path);
        return;
    }
    fwrite($OUT, $file_contents);
    fclose($OUT);
    if (filesize($download_cache_path)) {
        clearstatcache();
        echo "{$file_name} - " . filesize($download_cache_path) . "<br>\n";
        echo "<hr>Parsing Document {$file_name}<hr>\n";
예제 #29
0
 private function get_species_info_from_site($taxon_id)
 {
     if (!($html = Functions::get_remote_file(self::SPECIES_PROFILE_PAGE . $taxon_id, array('download_wait_time' => 5000000, 'timeout' => 20000, 'download_attempts' => 2)))) {
         echo "\n investigate taxon page down: [{$taxon_id}]\n";
         return;
     }
     if (preg_match("/Current Listing Status Summary<\\/caption>(.*?)<\\/table>/ims", $html, $matches)) {
         $html = trim($matches[1]);
         $html = str_ireplace("displaytagOddRow", "displaytagRow", $html);
         $html = str_ireplace("displaytagEvenRow", "displaytagRow", $html);
         if (preg_match_all("/<tr class\\=\"displaytagRow\">(.*?)<\\/tr>/ims", $html, $matches)) {
             $rows = $matches[1];
             $desc = "";
             foreach ($rows as $row) {
                 echo "\n ============";
                 if (preg_match_all("/<td>(.*?)<\\/td>/ims", $row, $matches)) {
                     $column = $matches[1];
                     $status = $column[0];
                     $date_listed = $column[1];
                     $lead_region = strip_tags($column[2]);
                     $where_listed = $column[3];
                     if (preg_match("/displayListingStatus\\(\"(.*?)\"/ims", $status, $matches)) {
                         $status = $matches[1];
                     }
                     $desc .= "Status: " . $status . "<br>";
                     $desc .= "Date Listed: " . $date_listed . "<br>";
                     $desc .= "Lead Region: " . $lead_region . "<br>";
                     $desc .= "Where Listed: " . $where_listed . "<br><br>";
                 }
             }
             if ($desc) {
                 return "<b>Current Listing Status Summary</b><br><br>" . $desc . "<br>";
             }
         }
     } else {
         echo "\n No Listing Status Summary - {$taxon_id} \n";
     }
 }
예제 #30
0
 public static function get_ref_from_site($dc_source)
 {
     $str = Functions::get_remote_file($dc_source);
     //$beg='"getActiveText()"><nonexplicit>';$end='</nonexplicit>';
     $ref = self::get_string_between('\\"getActiveText\\(\\)\\"><nonexplicit>', '<\\/nonexplicit>', $str);
     //$beg='"getActiveText()"><nonexplicit>';$end='</a>';
     $str = self::get_string_between('\\"getActiveText\\(\\)\\"><nonexplicit>', '<\\/a>', $str);
     $str .= "xxx";
     //$beg='<a';$end='xxx';
     $str = self::get_string_between('<a', 'xxx', $str);
     $str = "<a" . $str;
     $url = self::get_href_from_anchor_tag($str);
     return array($ref, $url);
 }