Example #1
0
$bookCount = 0;
# Parse all the tables on the web page into an array
$divs = parse_array($web_page['FILE'], "<div", "</div>");
foreach ($divs as $div) {
    $divClass = 'views-field-field-image-cache-fid';
    if (stristr($div, $divClass)) {
        $imgTag = parse_array($div, '<img', '/>');
        if ($cloudflare == 1) {
            $books[$bookCount]['imageUrl'] = resolve_address(str_replace("www", "ftp", get_attribute($imgTag[0], $attribute = "src")), $page_base);
        } else {
            $books[$bookCount]['imageUrl'] = resolve_address(get_attribute($imgTag[0], $attribute = "src"), $page_base);
        }
        if (!is_dir($save_image_directory)) {
            mkpath($save_image_directory);
        }
        $this_image_file = download_binary_file($books[$bookCount]['imageUrl'], $ref = "");
        if (stristr($books[$bookCount]['imageUrl'], ".jpg") || stristr($books[$bookCount]['imageUrl'], ".gif") || stristr($books[$bookCount]['imageUrl'], ".png")) {
            file_put_contents($save_image_directory . basename($books[$bookCount]['imageUrl']), $this_image_file);
        }
    }
    $divClass = 'views-field-title';
    if (stristr($div, $divClass)) {
        $books[$bookCount]['title'] = trim(strip_tags($div));
        $aTag = parse_array($div, '<a', '</a>');
        if ($cloudflare == 1) {
            $books[$bookCount]['bookUrl'] = resolve_address(str_replace("www", "ftp", get_attribute($aTag[0], $attribute = "href")), $page_base);
        } else {
            $books[$bookCount]['bookUrl'] = resolve_address(get_attribute($aTag[0], $attribute = "href"), $page_base);
        }
        $bookPage[$bookCount] = http_get($books[$bookCount]['bookUrl'], $target);
        $bookDivs[$bookCount] = parse_array($bookPage[$bookCount]['FILE'], "<div class=\"product-body\"", "</div>");
function download_images_for_page($target)
{
    echo "target = {$target}\n";
    # Download the web page
    $web_page = http_get($target, $referer = "");
    # Update the target in case there was a redirection
    $target = $web_page['STATUS']['url'];
    # Strip file name off target for use as page base
    $page_base = get_base_page_address($target);
    # Identify the directory where iamges are to be saved
    $save_image_directory = "saved_images_" . str_replace("http://", "", $page_base);
    # Parse the image tags
    $img_tag_array = parse_array($web_page['FILE'], "<img", ">");
    if (count($img_tag_array) == 0) {
        echo "No images found at {$target}\n";
        exit;
    }
    # Echo the image source attribute from each image tag
    for ($xx = 0; $xx < count($img_tag_array); $xx++) {
        $image_path = get_attribute($img_tag_array[$xx], $attribute = "src");
        echo " image: " . $image_path;
        $image_url = resolve_address($image_path, $page_base);
        if (get_base_domain_address($page_base) == get_base_domain_address($image_url)) {
            # Make image storage directory for image, if one doesn't exist
            $directory = substr($image_path, 0, strrpos($image_path, "/"));
            $directory = str_replace(":", "-", $directory);
            $image_path = str_replace(":", "-", $image_path);
            clearstatcache();
            // clear cache to get accurate directory status
            if (!is_dir($save_image_directory . "/" . $directory)) {
                mkpath($save_image_directory . "/" . $directory);
            }
            # Download the image, report image size
            $this_image_file = download_binary_file($image_url, $ref = "");
            echo " size: " . strlen($this_image_file);
            # Save the image
            if (stristr($image_url, ".jpg") || stristr($image_url, ".gif") || stristr($image_url, ".png")) {
                $fp = fopen($save_image_directory . "/" . $image_path, "w");
                fputs($fp, $this_image_file);
                fclose($fp);
                echo "\n";
            }
        } else {
            echo "\nSkipping off-domain image.\n";
        }
    }
}
function save_binary_file($url)
{
    $binary_file = download_binary_file($url, $refer = "");
    file_put_contents("temp.jpg", $binary_file);
}