function getimages($url) { $url = trim($url); if (strpos(strtoupper(trim($url)), "HTTP") === 0) { //do nothing } else { $url = "http://" . $url; } $html3 = file_get_contents($url); if (isset($ret)) { $ret = $this->processhtml($html3, $url, $ret); } else { $ret = $this->processhtml($html3, $url, array()); } if (count($ret) < 3) { if (!function_exists('curl_init')) { die('CURL is not installed!'); } $ch = curl_init(); $thepage = get_web_page($url); $html1 = $thepage['content']; $ret = processhtml($html1, $url, $ret); } if (count($ret) < 3) { $timeout = 15; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_ENCODING, ""); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $html2 = curl_exec($ch); curl_close($ch); $ret = processhtml($html2, $url, $ret); } if (count($ret) < 3) { $html4 = url_get_contents($url); $ret = processhtml($html4, $url, $ret); } if (count($ret) < 3) { $html5 = urlload($url); $ret = processhtml($html5, $url, $ret); } //foreach ($ret as $key=>$value) { // $img = explode("\n", $value); // if (($img[1] == 99) && ($img[2] == 99)) { // // $thumbnail = imagecreatefromstring(file_get_contents($img[3])); // $width = imagesx($thumbnail); // $height = imagesy($thumbnail); // ImageDestroy($thumbnail); // if (trim($width) != "") { // $value = sprintf('%07d', $height + $width) . "\n" . sprintf('%06d', $height) . "\n" . sprintf('%06d', $width) . "\n" . $img[3] // } // } //} //rsort($ret); return $ret; }
function grab_pics($params) { $cookie_path = "./cookies.txt"; $path = "allthumbs/"; $num = one(q("select count(*) from gallery\n\t\twhere approved = 0")); $num > 0 or die("Data is empty."); $i = min($num, 10000); print "Found {$num} galleries.\nStart grab pics for next {$i} galleries...\n"; //$galleries=q("select * from gallery // where approved = 0 order by random() limit $i"); //tmp //$_counttmp=1; $galleries = q("select * from gallery\n\t\twhere approved = 0 limit {$i}"); while ($row = fetch($galleries)) { //tmp //if($_counttmp <= 0) break; print "\n{$row['url']}\n"; if (strpos($row['url'], ".flv") !== false) { q("update gallery set approved=3\n\t\t\t\twhere id = %s", array($row["id"])); continue; } !file_exists($cookie_path) or unlink($cookie_path); $res = urlload($row["url"], array("cookies" => $cookie_path)); if ($res == "") { q("update gallery set approved=4\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tError load\t{$row['url']}\n", FILE_APPEND); continue; } /*if(!preg_match_all( '{<a[^<>]+href=([^<>\s]+)\s*[^<>]*>\s*<img[^<>]src=([^<>\s]+(jpeg|jpg)[^<>\s]*)\s*[^<>]*>\s*</a>}si', $res, $m, PREG_SET_ORDER)){ file_put_contents("errors.log", "{$row['id']}\tNot found thumbs\t{$row['url']}\n", FILE_APPEND); continue;}*/ $dom = new DOMDocument(); @$dom->loadHTML($res); $m = array(); foreach ($dom->getElementsByTagName("a") as $node) { $img = $node->getElementsByTagName("img")->item(0); $img and $m[] = array(null, $node->getAttribute("href"), $img->getAttribute("src")); } //coollect image links $urls = array(); while (sizeof($m) > 0 && sizeof($urls) < 5) { $tmp = array_pop($m); $tmp[1] = trim($tmp[1]); $tmp[2] = trim($tmp[2]); if ($tmp[1] == "" || $tmp[2] == "") { continue; } if (!in_array(strtolower(p(pathinfo(p(parse_url($tmp[1]), "path")), "extension")), array("jpeg", "jpg", "wmv", "mpg", "mpeg", "avi", "mp4", "mov", "flv"))) { continue; } $urls[] = $tmp; } if (sizeof($urls) == 0) { q("update gallery set approved=5\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tNot found valid thumbs\t{$row['url']}\n", FILE_APPEND); continue; } $i = array_rand($urls); $img_url = trim(unquotes($urls[$i][1])); $thumb_url = trim(unquotes($urls[$i][2])); $u = parse_url($img_url); $p = pathinfo($u["path"]); $is_pic_galler = strtolower($p["extension"]) == strtolower("jpg") || strtolower($p["extension"]) == strtolower("jpeg"); $link_url = $is_pic_galler ? $img_url : $thumb_url; $img_url = realurl(baseurl($row['url']), $link_url); print " {$img_url}\n"; list($res, $data) = urlload($img_url, array("getinfo" => true, "referer" => $row["url"], "cookies" => $cookie_path)); if ($res["http_code"] != "200") { q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tHTTP Error - {$res['http_code']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND); } else { if ($res["content_type"] == "image/jpeg") { $thumb = (int) one(q("select num from thumb_num")) + 1; $folder = $path . ceil($thumb / 1000); q("update thumb_num set num={$thumb}"); if (!is_dir($folder)) { mkdir($folder, 0777); } if (file_exists($name = "{$folder}/{$thumb}.jpg")) { unlink($name); } file_put_contents($name, $data); q("update gallery set thumb=%s, approved=1\n\t\t\t\twhere id = %s", array($thumb, $row["id"])); file_put_contents("downloaded.log", "{$row['id']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n\t{$thumb}\n", FILE_APPEND); } else { q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tError image/video type\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND); } } } }