function getimages($url)
 {
     $url = trim($url);
     if (strpos(strtoupper(trim($url)), "HTTP") === 0) {
         //do nothing
     } else {
         $url = "http://" . $url;
     }
     $html3 = file_get_contents($url);
     if (isset($ret)) {
         $ret = $this->processhtml($html3, $url, $ret);
     } else {
         $ret = $this->processhtml($html3, $url, array());
     }
     if (count($ret) < 3) {
         if (!function_exists('curl_init')) {
             die('CURL is not installed!');
         }
         $ch = curl_init();
         $thepage = get_web_page($url);
         $html1 = $thepage['content'];
         $ret = processhtml($html1, $url, $ret);
     }
     if (count($ret) < 3) {
         $timeout = 15;
         curl_setopt($ch, CURLOPT_URL, $url);
         curl_setopt($ch, CURLOPT_ENCODING, "");
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
         curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
         $html2 = curl_exec($ch);
         curl_close($ch);
         $ret = processhtml($html2, $url, $ret);
     }
     if (count($ret) < 3) {
         $html4 = url_get_contents($url);
         $ret = processhtml($html4, $url, $ret);
     }
     if (count($ret) < 3) {
         $html5 = urlload($url);
         $ret = processhtml($html5, $url, $ret);
     }
     //foreach ($ret as $key=>$value) {
     //	$img = explode("\n", $value);
     //	if (($img[1] == 99) && ($img[2] == 99)) {
     //
     //		$thumbnail = imagecreatefromstring(file_get_contents($img[3]));
     //		$width = imagesx($thumbnail);
     //		$height = imagesy($thumbnail);
     //		ImageDestroy($thumbnail);
     //		if (trim($width) != "") {
     //			$value = sprintf('%07d', $height + $width) . "\n" . sprintf('%06d', $height) . "\n" . sprintf('%06d', $width) . "\n" . $img[3]
     //		}
     //	}
     //}
     //rsort($ret);
     return $ret;
 }
示例#2
0
function grab_pics($params)
{
    $cookie_path = "./cookies.txt";
    $path = "allthumbs/";
    $num = one(q("select count(*) from gallery\n\t\twhere approved = 0"));
    $num > 0 or die("Data is empty.");
    $i = min($num, 10000);
    print "Found {$num} galleries.\nStart grab pics for next {$i} galleries...\n";
    //$galleries=q("select * from gallery
    //	where approved = 0 order by random() limit $i");
    //tmp
    //$_counttmp=1;
    $galleries = q("select * from gallery\n\t\twhere approved = 0 limit {$i}");
    while ($row = fetch($galleries)) {
        //tmp
        //if($_counttmp <= 0) break;
        print "\n{$row['url']}\n";
        if (strpos($row['url'], ".flv") !== false) {
            q("update gallery set approved=3\n\t\t\t\twhere id = %s", array($row["id"]));
            continue;
        }
        !file_exists($cookie_path) or unlink($cookie_path);
        $res = urlload($row["url"], array("cookies" => $cookie_path));
        if ($res == "") {
            q("update gallery set approved=4\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tError load\t{$row['url']}\n", FILE_APPEND);
            continue;
        }
        /*if(!preg_match_all(
        		'{<a[^<>]+href=([^<>\s]+)\s*[^<>]*>\s*<img[^<>]src=([^<>\s]+(jpeg|jpg)[^<>\s]*)\s*[^<>]*>\s*</a>}si',
        		$res, $m, PREG_SET_ORDER)){
        		file_put_contents("errors.log", "{$row['id']}\tNot found thumbs\t{$row['url']}\n", FILE_APPEND);
        		continue;}*/
        $dom = new DOMDocument();
        @$dom->loadHTML($res);
        $m = array();
        foreach ($dom->getElementsByTagName("a") as $node) {
            $img = $node->getElementsByTagName("img")->item(0);
            $img and $m[] = array(null, $node->getAttribute("href"), $img->getAttribute("src"));
        }
        //coollect image links
        $urls = array();
        while (sizeof($m) > 0 && sizeof($urls) < 5) {
            $tmp = array_pop($m);
            $tmp[1] = trim($tmp[1]);
            $tmp[2] = trim($tmp[2]);
            if ($tmp[1] == "" || $tmp[2] == "") {
                continue;
            }
            if (!in_array(strtolower(p(pathinfo(p(parse_url($tmp[1]), "path")), "extension")), array("jpeg", "jpg", "wmv", "mpg", "mpeg", "avi", "mp4", "mov", "flv"))) {
                continue;
            }
            $urls[] = $tmp;
        }
        if (sizeof($urls) == 0) {
            q("update gallery set approved=5\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tNot found valid thumbs\t{$row['url']}\n", FILE_APPEND);
            continue;
        }
        $i = array_rand($urls);
        $img_url = trim(unquotes($urls[$i][1]));
        $thumb_url = trim(unquotes($urls[$i][2]));
        $u = parse_url($img_url);
        $p = pathinfo($u["path"]);
        $is_pic_galler = strtolower($p["extension"]) == strtolower("jpg") || strtolower($p["extension"]) == strtolower("jpeg");
        $link_url = $is_pic_galler ? $img_url : $thumb_url;
        $img_url = realurl(baseurl($row['url']), $link_url);
        print "  {$img_url}\n";
        list($res, $data) = urlload($img_url, array("getinfo" => true, "referer" => $row["url"], "cookies" => $cookie_path));
        if ($res["http_code"] != "200") {
            q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tHTTP Error - {$res['http_code']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND);
        } else {
            if ($res["content_type"] == "image/jpeg") {
                $thumb = (int) one(q("select num from thumb_num")) + 1;
                $folder = $path . ceil($thumb / 1000);
                q("update thumb_num set num={$thumb}");
                if (!is_dir($folder)) {
                    mkdir($folder, 0777);
                }
                if (file_exists($name = "{$folder}/{$thumb}.jpg")) {
                    unlink($name);
                }
                file_put_contents($name, $data);
                q("update gallery set thumb=%s, approved=1\n\t\t\t\twhere id = %s", array($thumb, $row["id"]));
                file_put_contents("downloaded.log", "{$row['id']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n\t{$thumb}\n", FILE_APPEND);
            } else {
                q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"]));
                file_put_contents("errors.log", "{$row['id']}\tError image/video type\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND);
            }
        }
    }
}