Пример #1
0
function parse_contents($str)
{
    global $wrap;
    global $site_url;
    /* it can be:
       <a href="speciesdetail.cfm?genus=Abyssopathes&subgenus=&species=lyra&subspecies=&synseniorid=9266&validspecies=Abyssopathes%20lyra&authorship=%28Brook%2C%201889%29">Abyssopathes lyra (Brook, 1889)</a>
       or
       <a href="speciesdetail_for_nosyn.cfm?species=dentata&genus=Sandalolitha&subgenus=&subspecies=">Sandalolitha dentata Quelch, 1884</a>
       //
       */
    $temp = '';
    $beg = 'speciesdetail.cfm?';
    $end1 = '</a>';
    $temp = trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == '') {
        $beg = 'speciesdetail_for_nosyn.cfm?';
        $end1 = '</a>';
        $temp = trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, ""));
    }
    //anemone2/speciesdetail_for_nosyn.cfm?spe
    $temp = '<a href="' . $site_url . '' . $beg . $temp . "</a>";
    //get url_for_main_menu
    $beg = '="';
    $end1 = '">';
    $url_for_main_menu = trim(parse_html($temp, $beg, $end1, $end1, $end1, $end1, ""));
    //print"$wrap [<a href='$url_for_main_menu'>url_for_main_menu</a>]";
    //end url_for_main_menu
    //get sciname
    $beg = '">';
    $end1 = '</a>';
    $taxa = trim(parse_html($temp, $beg, $end1, $end1, $end1, $end1, ""));
    print "{$wrap} taxa[{$taxa}]";
    //end sciname
    $main_menu = Functions::get_remote_file($url_for_main_menu);
    //get url for images page
    $url_for_images_page = "";
    //"images.cfm?&genus=Abyssopathes&subgenus=&species=lyra&subspecies=&seniorid=9266&validspecies=Abyssopathes%20lyra&authorship=%28Brook%2C%201889%29">Images</a>
    $beg = 'images.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $arr_images = array();
    if ($temp != "") {
        $url_for_images_page = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_images_page'>images</a>]";
        ///*
        $arr_images = get_images($url_for_images_page);
        //*/
    } else {
        print "{$wrap} no images";
    }
    //end url for images page
    //get url for classification
    $url_for_classification = "";
    //"showclassification2.cfm?synseniorid=2914&genus=Aiptasiogeton&subgenus=&species=eruptaurantia&subspecies=&origgenus=Actinothoe&origspecies=eruptaurantia&origsubspecies=&origsubgenus=&&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Classification</a>
    $beg = 'showclassification2.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == "") {
        //http://hercules.kgs.ku.edu/hexacoral/anemone2/classification_path_no_syn.cfm?genus=Astr%C3%A6a&subgenus=&species=abdita&subspecies=
        $beg = 'classification_path_no_syn.cfm';
        $end1 = '">';
        $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    }
    if ($temp != "") {
        $url_for_classification = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_classification'>classification</a>]";
        $arr_classification = get_tabular_data($url_for_classification, "classification");
        if ($arr_classification) {
            $arr_classification = parse_classification($arr_classification);
        }
    } else {
        print "{$wrap} no classification";
    }
    //end url for classification
    //get url for strict_synonymy
    $url_for_strict_synonymy = "";
    //"synonymy_strict.cfm?seniorid=2914&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Strict synonymy</a>
    $beg = 'synonymy_strict.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp != "") {
        $url_for_strict_synonymy = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_strict_synonymy'>strict_synonymy</a>]";
        $arr_synonyms = get_tabular_data($url_for_strict_synonymy, "synonyms");
    } else {
        print "{$wrap} no strict_synonymy";
    }
    //end url for strict_synonymy
    //get url for references
    $url_for_references = "";
    //"all_mentions_of_names2.cfm?species...
    $beg = 'all_mentions_of_names.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    if ($temp == "") {
        $beg = 'all_mentions_of_names2.cfm';
        $end1 = '">';
        $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    }
    $arr_references = array();
    if ($temp != "") {
        $url_for_references = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_references'>references</a>]";
        $arr_references = get_tabular_data($url_for_references, "references");
        //start process
        $arr = array();
        foreach ($arr_references as $value) {
            $temp = "";
            foreach ($value as $item) {
                $temp .= "." . $item;
            }
            $temp = trim(substr($temp, 1, strlen($temp)));
            //to remove the '.' on the first char
            //<a href="reference_detail.cfm?ref_number=58&type=Article">
            $temp = str_ireplace("reference_detail.cfm", $site_url . "reference_detail.cfm", $temp);
            //if we want to remove the anchor
            //$temp = get_str_from_anchor_tag($temp);
            $arr["{$temp}"] = 1;
        }
        $arr_references = array_keys($arr);
    } else {
        print "{$wrap} no references";
    }
    //end url for references
    //get url for common_names
    $url_for_common_names = "";
    //"common.cfm?seniorid=2914&validspecies=Aiptasiogeton%20eruptaurantia&authorship=%28Field%2C%201949%29">Strict synonymy</a>
    $beg = 'common.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $arr_common_names = array();
    if ($temp != "") {
        $url_for_common_names = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_common_names'>common_names</a>]";
        $arr_common_names = get_tabular_data($url_for_common_names, "common_names");
        //start process
        $arr = array();
        foreach ($arr_common_names as $value) {
            //$temp = strtolower($value[0]); //not a good idea especially for special chars
            $temp = $value[0];
            $temp = trim(get_str_from_anchor_tag($temp));
            //print"[$temp]";
            $arr["{$temp}"] = 1;
        }
        $arr_common_names = array_keys($arr);
    } else {
        print "{$wrap} no common_names";
    }
    //end url for common_names
    //get url for skeletons
    //e.g. for species (Favites abdita) with skeleton
    $url_for_skeletons = "";
    //http://hercules.kgs.ku.edu/hexacoral/anemone2/skeleton.cfm?genus=Favites&subgenus=&species=abdita&subspecies=
    $beg = 'skeleton.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_skeletons = "";
    if ($temp != "") {
        $url_for_skeletons = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_skeletons'>skeletons</a>]";
        $arr_skeletons = get_tabular_data($url_for_skeletons, "skeletons");
        if ($arr_skeletons) {
            $arr_fields = array("Author", "Skeleton?", "Mineral or Organic?", "Mineral", "Percent Magnesium");
            $html_skeletons = arr2html($arr_skeletons, $arr_fields, $url_for_main_menu);
            $html_skeletons = "<div style='font-size : small;'>{$html_skeletons}</div>";
        }
    } else {
        print "{$wrap} no skeletons";
    }
    //end url for skeleton
    //get url for biological_associations
    $url_for_biological_associations = "";
    $beg = 'symbiont_info.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_biological_associations = "";
    if ($temp != "") {
        $url_for_biological_associations = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_biological_associations'>biological_associations</a>]";
        $arr_biological_associations = get_tabular_data($url_for_biological_associations, "biological_associations");
        $arr_fields = array("Algal symbionts");
        $html_biological_associations = arr2html($arr_biological_associations, $arr_fields, $url_for_main_menu);
        $html_biological_associations = "<div style='font-size : small;'>{$html_biological_associations}</div>";
    } else {
        print "{$wrap} no biological_associations";
    }
    //end url for biological_associations
    //get url for nematocysts
    $url_for_nematocysts = "";
    $beg = 'cnidae_information.cfm';
    $end1 = '">';
    $temp = trim(parse_html($main_menu, $beg, $end1, $end1, $end1, $end1, ""));
    $html_nematocysts = "";
    if ($temp != "") {
        $url_for_nematocysts = $site_url . $beg . $temp;
        //print"$wrap [<a href='$url_for_nematocysts'>nematocysts</a>]";
        $arr_nematocysts = get_tabular_data($url_for_nematocysts, "nematocysts");
        $arr_fields = array("Location", "Image", "Cnidae Type", "Range of <br> Lengths (m)", " ", "Range of <br >Widths (m)", "n", "N", "State");
        $html_nematocysts = arr2html($arr_nematocysts, $arr_fields, $url_for_main_menu);
        $html_nematocysts = "<div style='font-size : small;'>{$html_nematocysts}</div>";
        //to have the 2nd row have colspan=9
        $html_nematocysts = str_ireplace("</th></tr><tr><td>", "</th></tr><tr><td colspan='9'>", $html_nematocysts);
    } else {
        print "{$wrap} no nematocysts";
    }
    //end url for nematocysts
    //get url for specimens
    /*
        $url_for_specimens="";
        //all_specimens_xml.cfm?
        $beg='all_specimens_xml.cfm'; $end1='">';
        $temp = trim(parse_html($main_menu,$beg,$end1,$end1,$end1,$end1,""));
        $arr_specimens=array();
        if($temp != "")
        {
            $url_for_specimens = $site_url . $beg . $temp;
            print"$wrap [<a href='$url_for_specimens'>specimens</a>]";
            $arr_specimens = get_tabular_data($url_for_specimens,"specimens");
            //start process
            $arr=array();
            foreach ($arr_specimens as $value)
            {
                $temp = @$value[5];
                $arr["$temp"]=1;
            }
            $arr_specimens = array_keys($arr);
        }else print"$wrap no specimens";
    */
    //end url for specimens
    //print"<hr>$main_menu";
    //========================================================================================
    //return array ($id,$image_url,$description,$desc_pic,$desc_taxa,$categories,$taxa,$copyright,$providers,$creation_date,$photo_credit,$outlinks);
    return array($taxa, $url_for_main_menu, $arr_classification, $arr_images, $html_skeletons, $url_for_skeletons, $html_biological_associations, $url_for_biological_associations, $arr_common_names, $arr_references, $html_nematocysts, $url_for_nematocysts);
}
Пример #2
0
function parse_image_page($file)
{
    if ($str = Functions::get_remote_file($file)) {
        $pos = stripos($str, "Image Only");
        $str = trim(substr($str, $pos, strlen($str)));
        $beg = '<table';
        $end1 = '</table>';
        $str = "{$beg} " . trim(trim(parse_html($str, $beg, $end1, $end1, $end1, $end1, ""))) . " {$end1}";
        //print"<hr>$str";
        $arr = get_tabular_data($str);
        return $arr;
    }
    return;
}