function download_images_for_page($target)
{
    echo "target = {$target}\n";
    # Download the web page
    $web_page = http_get($target, $referer = "");
    # Update the target in case there was a redirection
    $target = $web_page['STATUS']['url'];
    # Strip file name off target for use as page base
    $page_base = get_base_page_address($target);
    # Identify the directory where iamges are to be saved
    $save_image_directory = "saved_images_" . str_replace("http://", "", $page_base);
    # Parse the image tags
    $img_tag_array = parse_array($web_page['FILE'], "<img", ">");
    if (count($img_tag_array) == 0) {
        echo "No images found at {$target}\n";
        exit;
    }
    # Echo the image source attribute from each image tag
    for ($xx = 0; $xx < count($img_tag_array); $xx++) {
        $image_path = get_attribute($img_tag_array[$xx], $attribute = "src");
        echo " image: " . $image_path;
        $image_url = resolve_address($image_path, $page_base);
        if (get_base_domain_address($page_base) == get_base_domain_address($image_url)) {
            # Make image storage directory for image, if one doesn't exist
            $directory = substr($image_path, 0, strrpos($image_path, "/"));
            $directory = str_replace(":", "-", $directory);
            $image_path = str_replace(":", "-", $image_path);
            clearstatcache();
            // clear cache to get accurate directory status
            if (!is_dir($save_image_directory . "/" . $directory)) {
                mkpath($save_image_directory . "/" . $directory);
            }
            # Download the image, report image size
            $this_image_file = download_binary_file($image_url, $ref = "");
            echo " size: " . strlen($this_image_file);
            # Save the image
            if (stristr($image_url, ".jpg") || stristr($image_url, ".gif") || stristr($image_url, ".png")) {
                $fp = fopen($save_image_directory . "/" . $image_path, "w");
                fputs($fp, $this_image_file);
                fclose($fp);
                echo "\n";
            }
        } else {
            echo "\nSkipping off-domain image.\n";
        }
    }
}
function resolve_address($link, $page_base)
{
    #----------------------------------------------------------
    # CONDITION INCOMING LINK ADDRESS
    #
    $link = trim($link);
    $page_base = trim($page_base);
    # if there isn't one, put a "/" at the end of the $page_base
    $page_base = trim($page_base);
    if (strrpos($page_base, "/") + 1 != strlen($page_base)) {
        $page_base = $page_base . "/";
    }
    # remove unwanted characters from $link
    $link = str_replace(";", "", $link);
    // remove ; characters
    $link = str_replace("\"", "", $link);
    // remove " characters
    $link = str_replace("'", "", $link);
    // remove ' characters
    $abs_address = $page_base . $link;
    $abs_address = str_replace("/./", "/", $abs_address);
    $abs_done = 0;
    #----------------------------------------------------------
    # LOOK FOR REFERENCES TO THE BASE DOMAIN ADDRESS
    #----------------------------------------------------------
    # There are essentially four types of addresses to resolve:
    # 1. References to the base domain address
    # 2. References to higher directories
    # 3. References to the base directory
    # 4. Addresses that are alreday fully resolved
    #
    if ($abs_done == 0) {
        # Use domain base address if $link starts with "/"
        if (substr($link, 0, 1) == "/") {
            // find the left_most "."
            $pos_left_most_dot = strrpos($page_base, ".");
            # Find the left-most "/" in $page_base after the dot
            for ($xx = $pos_left_most_dot; $xx < strlen($page_base); $xx++) {
                if (substr($page_base, $xx, 1) == "/") {
                    break;
                }
            }
            $domain_base_address = get_base_domain_address($page_base);
            $abs_address = $domain_base_address . $link;
            $abs_done = 1;
        }
    }
    #----------------------------------------------------------
    # LOOK FOR REFERENCES TO HIGHER DIRECTORIES
    #
    if ($abs_done == 0) {
        if (substr($link, 0, 3) == "../") {
            $page_base = trim($page_base);
            $right_most_slash = strrpos($page_base, "/");
            // remove slash if at end of $page base
            if ($right_most_slash == strlen($page_base) - 1) {
                $page_base = substr($page_base, 0, strlen($page_base) - 1);
                $right_most_slash = strrpos($page_base, "/");
            }
            if ($right_most_slash < 8) {
                $unadjusted_base_address = $page_base;
            }
            $not_done = TRUE;
            while ($not_done) {
                // bring page base back one level
                list($page_base, $link) = move_address_back_one_level($page_base, $link);
                if (substr($link, 0, 3) != "../") {
                    $not_done = FALSE;
                }
            }
            if (isset($unadjusted_base_address)) {
                $abs_address = $unadjusted_base_address . "/" . $link;
            } else {
                $abs_address = $page_base . "/" . $link;
            }
            $abs_done = 1;
        }
    }
    #----------------------------------------------------------
    # LOOK FOR REFERENCES TO BASE DIRECTORY
    #
    if ($abs_done == 0) {
        if (substr($link, 0, "1") == "/") {
            $link = substr($link, 1, strlen($link) - 1);
            // remove leading "/"
            $abs_address = $page_base . $link;
            // combine object with base address
            $abs_done = 1;
        }
    }
    #----------------------------------------------------------
    # LOOK FOR REFERENCES THAT ARE ALREADY ABSOLUTE
    #
    if ($abs_done == 0) {
        if (substr($link, 0, 4) == "http") {
            $abs_address = $link;
            $abs_done = 1;
        }
    }
    #----------------------------------------------------------
    # ADD PROTOCOL IDENTIFIER IF NEEDED
    #
    if (substr($abs_address, 0, 7) != "http://" && substr($abs_address, 0, 8) != "https://") {
        $abs_address = "http://" . $abs_address;
    }
    return $abs_address;
}
$url_4 = "http://";
$url_5 = "http://www.burceyoo.com/index.html";
$url_6 = "www.burceyoo.com/index.html";
echo "{$url_1} base_page: " . get_base_page_address($url_1) . "\n";
echo "{$url_2} base_page: " . get_base_page_address($url_2) . "\n";
echo "{$url_3} base_page: " . get_base_page_address($url_3) . "\n";
echo "{$url_4} base_page: " . get_base_page_address($url_4) . "\n";
echo "{$url_5} base_page: " . get_base_page_address($url_5) . "\n";
echo "{$url_6} base_page: " . get_base_page_address($url_6) . "\n";
echo "--------------------------------------------------------\n";
/*
	应该使用base_page_address 测试直接使用原url address
*/
echo "{$url_1} base_domain: " . get_base_domain_address($url_1) . "\n";
echo "{$url_2} base_domain: " . get_base_domain_address($url_2) . "\n";
echo "{$url_3} base_domain: " . get_base_domain_address($url_3) . "\n";
// echo "url_4 base_domain: ".get_base_domain_address($url_4) ."\n"; #会报错,看可以继续运行返回为空
echo "{$url_5} base_domain: " . get_base_domain_address($url_5) . "\n";
echo "{$url_6} base_domain: " . get_base_domain_address($url_6) . "\n";
echo "--------------------------------------------------------\n";
$link_1 = "../image/book1.jpg";
$link_2 = "./image/book2.jpg";
$link_3 = "/image/book3.jpg";
$link_4 = "image/book4.jpg";
$base_url = "http://bruce.com.cn/book/sale/";
echo "base_url:" . $base_url . "\n";
echo "{$link_1} resolve_address:" . resolve_address($link_1, $base_url) . "\n";
echo "{$link_2} resolve_address:" . resolve_address($link_2, $base_url) . "\n";
echo "{$link_3} resolve_address:" . resolve_address($link_3, $base_url) . "\n";
echo "{$link_4} resolve_address:" . resolve_address($link_4, $base_url) . "\n";
echo "--------------------------------------------------------\n";