public function testRealURL() { $baseRoot = "http://site.com/"; $baseAsd = "http://site.com/asd"; $baseAsdSlash = "http://site.com/asd/"; $baseAsdSlashD = "http://site.com/asd/./"; $baseAsdSlashDD = "http://site.com/asd/../"; // $this->assertTrue(realurl("http://site.com") === "http://site.com/"); $this->assertTrue(realurl("http://site.com/") === "http://site.com/"); $this->assertTrue(realurl("http://site.com/./") === "http://site.com/"); $this->assertTrue(realurl("http://site.com/asd") === "http://site.com/asd"); $this->assertTrue(realurl("http://site.com/asd/") === "http://site.com/asd/"); $this->assertTrue(realurl("http://site.com/../") === "http://site.com/"); $this->assertTrue(realurl("http://site.com/../../../asd/") === "http://site.com/asd/"); $this->assertTrue(realurl("http://site.com/123/456/../asd/") === "http://site.com/123/asd/"); // $this->assertTrue(realurl("/", "http://site.com/") === "http://site.com/"); $this->assertTrue(realurl("/", "http://site.com/asd") === "http://site.com/"); $this->assertTrue(realurl("/./", "http://site.com/asd") === "http://site.com/"); $this->assertTrue(realurl("/./../", "http://site.com/asd") === "http://site.com/"); // $this->assertTrue(realurl("index.html", "http://site.com/asd/contacts.html") === "http://site.com/asd/index.html"); $this->assertTrue(realurl("?q=1", "http://site.com/asd/../contacts.html") === "http://site.com/contacts.html?q=1"); $this->assertTrue(realurl("../page?q=1", "http://site.com/asd/path/") === "http://site.com/asd/page?q=1"); // $this->assertTrue(realurl("//site.com", 'https://site2.com') === "https://site.com/"); $this->assertTrue(realurl("//site.com", '//site2.com') === "http://site.com/"); }
/** * Extract href links which have a common top url * * @static * @param string &$contents * @param string $topurl The top url * @return array * [0] whole tag matches * [1] prefix words to hrefs * [2] hrefs * [3] absolute urls */ function &get_pkwk_href_links(&$contents, $topurl) { // Must be <a....href="....." ....> $pattern = '#' . '(<a[^>]+href=")([^> "]*)"' . '#'; $maches = array(); preg_match_all($pattern, $contents, $matches); $matches[3] = array(); foreach ($matches[2] as $i => $href) { $url = unhtmlspecialchars($href); $url = realurl($topurl, $url); if (strpos($url, $topurl) === 0) { $matches[3][$i] = $url; } else { unset($matches[0][$i]); unset($matches[1][$i]); unset($matches[2][$i]); } } return $matches; }
function grab_pics($params) { $cookie_path = "./cookies.txt"; $path = "allthumbs/"; $num = one(q("select count(*) from gallery\n\t\twhere approved = 0")); $num > 0 or die("Data is empty."); $i = min($num, 10000); print "Found {$num} galleries.\nStart grab pics for next {$i} galleries...\n"; //$galleries=q("select * from gallery // where approved = 0 order by random() limit $i"); //tmp //$_counttmp=1; $galleries = q("select * from gallery\n\t\twhere approved = 0 limit {$i}"); while ($row = fetch($galleries)) { //tmp //if($_counttmp <= 0) break; print "\n{$row['url']}\n"; if (strpos($row['url'], ".flv") !== false) { q("update gallery set approved=3\n\t\t\t\twhere id = %s", array($row["id"])); continue; } !file_exists($cookie_path) or unlink($cookie_path); $res = urlload($row["url"], array("cookies" => $cookie_path)); if ($res == "") { q("update gallery set approved=4\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tError load\t{$row['url']}\n", FILE_APPEND); continue; } /*if(!preg_match_all( '{<a[^<>]+href=([^<>\s]+)\s*[^<>]*>\s*<img[^<>]src=([^<>\s]+(jpeg|jpg)[^<>\s]*)\s*[^<>]*>\s*</a>}si', $res, $m, PREG_SET_ORDER)){ file_put_contents("errors.log", "{$row['id']}\tNot found thumbs\t{$row['url']}\n", FILE_APPEND); continue;}*/ $dom = new DOMDocument(); @$dom->loadHTML($res); $m = array(); foreach ($dom->getElementsByTagName("a") as $node) { $img = $node->getElementsByTagName("img")->item(0); $img and $m[] = array(null, $node->getAttribute("href"), $img->getAttribute("src")); } //coollect image links $urls = array(); while (sizeof($m) > 0 && sizeof($urls) < 5) { $tmp = array_pop($m); $tmp[1] = trim($tmp[1]); $tmp[2] = trim($tmp[2]); if ($tmp[1] == "" || $tmp[2] == "") { continue; } if (!in_array(strtolower(p(pathinfo(p(parse_url($tmp[1]), "path")), "extension")), array("jpeg", "jpg", "wmv", "mpg", "mpeg", "avi", "mp4", "mov", "flv"))) { continue; } $urls[] = $tmp; } if (sizeof($urls) == 0) { q("update gallery set approved=5\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tNot found valid thumbs\t{$row['url']}\n", FILE_APPEND); continue; } $i = array_rand($urls); $img_url = trim(unquotes($urls[$i][1])); $thumb_url = trim(unquotes($urls[$i][2])); $u = parse_url($img_url); $p = pathinfo($u["path"]); $is_pic_galler = strtolower($p["extension"]) == strtolower("jpg") || strtolower($p["extension"]) == strtolower("jpeg"); $link_url = $is_pic_galler ? $img_url : $thumb_url; $img_url = realurl(baseurl($row['url']), $link_url); print " {$img_url}\n"; list($res, $data) = urlload($img_url, array("getinfo" => true, "referer" => $row["url"], "cookies" => $cookie_path)); if ($res["http_code"] != "200") { q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tHTTP Error - {$res['http_code']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND); } else { if ($res["content_type"] == "image/jpeg") { $thumb = (int) one(q("select num from thumb_num")) + 1; $folder = $path . ceil($thumb / 1000); q("update thumb_num set num={$thumb}"); if (!is_dir($folder)) { mkdir($folder, 0777); } if (file_exists($name = "{$folder}/{$thumb}.jpg")) { unlink($name); } file_put_contents($name, $data); q("update gallery set thumb=%s, approved=1\n\t\t\t\twhere id = %s", array($thumb, $row["id"])); file_put_contents("downloaded.log", "{$row['id']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n\t{$thumb}\n", FILE_APPEND); } else { q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"])); file_put_contents("errors.log", "{$row['id']}\tError image/video type\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND); } } } }
function get_json_data($service, $object = NULL, $params = '', $repo = NULL) { $service_url = $GLOBALS[$service . '_api_url']; $service_id = $GLOBALS[$service . '_id']; $cache_file = $GLOBALS[$service . '_cache_file']; global $cache_dir; // Attempt to make the cache directory if it doesn't already exist if (!file_exists($cache_dir)) { mkdir($cache_dir); } // Local 'tmp' cache file on the webserver, preferably out of public reach, i.e. // htdocs/tmp/.json_github_lmms_releases. $tmp_suffix = ($repo ? $repo : $service_id) . ($object ? '_' . $object : ''); $tmp_suffix = str_replace('/', '', str_replace('.', '', str_replace('__', '_', $tmp_suffix))); // For "resolve" requests, hash the track URL for cache filename if ($service == 'soundcloud' && $params && strpos($params, '://') !== false) { $tmp_suffix = md5($params) . $tmp_suffix; } $tmp_cache = $cache_dir . $cache_file . $tmp_suffix; // If the repository isn't specified, assume it's the same as the project name and build accordingly // i.e. "https://api.github.com/repos/lmms/lmms/releases?param=value" // i.e. "https://www.googleapis.com/plus/v1/people/113001340835122723950/activities/public?maxResults=25 switch ($service) { case 'youtube': $full_api = $service_url . ($object ? $object : 'playlists') . '?channelId=' . ($repo ? $repo : $service_id) . $params; break; case 'soundcloud': $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . ($object ? $object : 'tracks') . '.json' . $params; break; case 'facebook': $full_api = $service_url . '?id=' . ($repo ? $repo : $service_id) . '&format=json' . $params; break; case 'google': $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . $object . '/public/' . $params; break; case 'github': default: $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . $service_id . '/' . $object . $params; } $using_url = false; if (cache_expired($tmp_cache)) { $json = file_get_contents_curl(realurl($full_api), $service); $using_url = true; } else { $json = file_get_contents($tmp_cache); } $obj = json_decode($json); /* * If there's valid JSON data, AND it came from the web cache it * If not, fall back to the previous cache */ if (has_children($obj, $service)) { if ($using_url) { @file_put_contents($tmp_cache, $json, LOCK_EX); } return $obj; } else { $json = @file_get_contents($tmp_cache); return json_decode($json); } }