function main() { $html = get_page(encode_cjk_url($_GET['url'])); $img_urls = parse_img_urls($html); switch (count($img_urls)) { case 0: exit_with_msg('Images not found'); break; case 1: redirect_location(array_pop($img_urls)); break; default: output_zip(make_zip(fetch_and_store_images($img_urls))); break; } }
/** * Read short urls text file tumblr_likes.txt * deal with every short url: * 1 read the unwanted_files.txt for name of files that we don't need * 2 fetch out long url of short url, if failed write the short url to invalid_urls.txt * 3 fetch out resource uris inside html content of long url, filter this uris by the content of unwanted_files.txt * 4 write out the resource uris to resource_urls.txt * @param null $direct_url if specific deal with this single url instead of reading tumblr_likes.txt */ function main($direct_url = null) { $txt = $direct_url ? $direct_url : file_get_contents('tumblr_likes.txt'); if (preg_match_all('#http://ift.tt/.*#', $txt, $matches)) { $unwanted = file_get_contents('unwanted_files.txt'); #deal with each short url foreach ($matches[0] as $ori_url) { try { echo str_repeat('-', 30), "\n"; echo "Start: {$ori_url}\n"; #get long url $redirect_url = get_redirect_target($ori_url); if (!$redirect_url) { file_put_contents('invalid_urls.txt', "{$ori_url}\n", FILE_APPEND); throw new exception("invalid original URL {$ori_url}"); } else { #file_put_contents('real_post_urls.txt', "$redirect_url\n", FILE_APPEND); } echo "Location fetched: {$redirect_url}\n"; #get html page content $page_src = get_page_src($redirect_url); if (!$page_src) { throw new exception("zero length page_src"); } printf("Page fetched: length(%d)\n", strlen($page_src)); #fetch out resource urls $resource_urls = array(); $post_type = get_post_type($page_src); switch ($post_type) { case 'photo': case 'photoset': $resource_urls = parse_img_urls($page_src); break; case 'audio': echo "fetching audio\n"; $resource_urls = parse_audio_url($page_src); break; case 'video': echo "fetching video\n"; $resource_urls = parse_video_url($page_src); break; default: echo 'unknown resource, trying images', "\n"; $resource_urls = parse_img_urls($page_src); } if (is_array($resource_urls)) { foreach ($resource_urls as $index => $url) { if (strpos($unwanted, basename($url)) !== false) { unset($resource_urls[$index]); } } $resource_urls = implode("\n", $resource_urls); } echo $resource_urls .= "\n"; file_put_contents('resource_urls.txt', $resource_urls, FILE_APPEND); } catch (exception $e) { echo 'Exception: ', $e->getMessage(), "\n"; } } } }