示例#1
0
文件: CoreTest.php 项目: ejz/core
 public function testRealURL()
 {
     $baseRoot = "http://site.com/";
     $baseAsd = "http://site.com/asd";
     $baseAsdSlash = "http://site.com/asd/";
     $baseAsdSlashD = "http://site.com/asd/./";
     $baseAsdSlashDD = "http://site.com/asd/../";
     //
     $this->assertTrue(realurl("http://site.com") === "http://site.com/");
     $this->assertTrue(realurl("http://site.com/") === "http://site.com/");
     $this->assertTrue(realurl("http://site.com/./") === "http://site.com/");
     $this->assertTrue(realurl("http://site.com/asd") === "http://site.com/asd");
     $this->assertTrue(realurl("http://site.com/asd/") === "http://site.com/asd/");
     $this->assertTrue(realurl("http://site.com/../") === "http://site.com/");
     $this->assertTrue(realurl("http://site.com/../../../asd/") === "http://site.com/asd/");
     $this->assertTrue(realurl("http://site.com/123/456/../asd/") === "http://site.com/123/asd/");
     //
     $this->assertTrue(realurl("/", "http://site.com/") === "http://site.com/");
     $this->assertTrue(realurl("/", "http://site.com/asd") === "http://site.com/");
     $this->assertTrue(realurl("/./", "http://site.com/asd") === "http://site.com/");
     $this->assertTrue(realurl("/./../", "http://site.com/asd") === "http://site.com/");
     //
     $this->assertTrue(realurl("index.html", "http://site.com/asd/contacts.html") === "http://site.com/asd/index.html");
     $this->assertTrue(realurl("?q=1", "http://site.com/asd/../contacts.html") === "http://site.com/contacts.html?q=1");
     $this->assertTrue(realurl("../page?q=1", "http://site.com/asd/path/") === "http://site.com/asd/page?q=1");
     //
     $this->assertTrue(realurl("//site.com", 'https://site2.com') === "https://site.com/");
     $this->assertTrue(realurl("//site.com", '//site2.com') === "http://site.com/");
 }
 /**
  * Extract href links which have a common top url
  *
  * @static
  * @param string &$contents
  * @param string $topurl The top url 
  * @return array 
  * [0] whole tag matches
  * [1] prefix words to hrefs
  * [2] hrefs
  * [3] absolute urls
  */
 function &get_pkwk_href_links(&$contents, $topurl)
 {
     // Must be <a....href="....." ....>
     $pattern = '#' . '(<a[^>]+href=")([^> "]*)"' . '#';
     $maches = array();
     preg_match_all($pattern, $contents, $matches);
     $matches[3] = array();
     foreach ($matches[2] as $i => $href) {
         $url = unhtmlspecialchars($href);
         $url = realurl($topurl, $url);
         if (strpos($url, $topurl) === 0) {
             $matches[3][$i] = $url;
         } else {
             unset($matches[0][$i]);
             unset($matches[1][$i]);
             unset($matches[2][$i]);
         }
     }
     return $matches;
 }
示例#3
0
function grab_pics($params)
{
    $cookie_path = "./cookies.txt";
    $path = "allthumbs/";
    $num = one(q("select count(*) from gallery\n\t\twhere approved = 0"));
    $num > 0 or die("Data is empty.");
    $i = min($num, 10000);
    print "Found {$num} galleries.\nStart grab pics for next {$i} galleries...\n";
    //$galleries=q("select * from gallery
    //	where approved = 0 order by random() limit $i");
    //tmp
    //$_counttmp=1;
    $galleries = q("select * from gallery\n\t\twhere approved = 0 limit {$i}");
    while ($row = fetch($galleries)) {
        //tmp
        //if($_counttmp <= 0) break;
        print "\n{$row['url']}\n";
        if (strpos($row['url'], ".flv") !== false) {
            q("update gallery set approved=3\n\t\t\t\twhere id = %s", array($row["id"]));
            continue;
        }
        !file_exists($cookie_path) or unlink($cookie_path);
        $res = urlload($row["url"], array("cookies" => $cookie_path));
        if ($res == "") {
            q("update gallery set approved=4\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tError load\t{$row['url']}\n", FILE_APPEND);
            continue;
        }
        /*if(!preg_match_all(
        		'{<a[^<>]+href=([^<>\s]+)\s*[^<>]*>\s*<img[^<>]src=([^<>\s]+(jpeg|jpg)[^<>\s]*)\s*[^<>]*>\s*</a>}si',
        		$res, $m, PREG_SET_ORDER)){
        		file_put_contents("errors.log", "{$row['id']}\tNot found thumbs\t{$row['url']}\n", FILE_APPEND);
        		continue;}*/
        $dom = new DOMDocument();
        @$dom->loadHTML($res);
        $m = array();
        foreach ($dom->getElementsByTagName("a") as $node) {
            $img = $node->getElementsByTagName("img")->item(0);
            $img and $m[] = array(null, $node->getAttribute("href"), $img->getAttribute("src"));
        }
        //coollect image links
        $urls = array();
        while (sizeof($m) > 0 && sizeof($urls) < 5) {
            $tmp = array_pop($m);
            $tmp[1] = trim($tmp[1]);
            $tmp[2] = trim($tmp[2]);
            if ($tmp[1] == "" || $tmp[2] == "") {
                continue;
            }
            if (!in_array(strtolower(p(pathinfo(p(parse_url($tmp[1]), "path")), "extension")), array("jpeg", "jpg", "wmv", "mpg", "mpeg", "avi", "mp4", "mov", "flv"))) {
                continue;
            }
            $urls[] = $tmp;
        }
        if (sizeof($urls) == 0) {
            q("update gallery set approved=5\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tNot found valid thumbs\t{$row['url']}\n", FILE_APPEND);
            continue;
        }
        $i = array_rand($urls);
        $img_url = trim(unquotes($urls[$i][1]));
        $thumb_url = trim(unquotes($urls[$i][2]));
        $u = parse_url($img_url);
        $p = pathinfo($u["path"]);
        $is_pic_galler = strtolower($p["extension"]) == strtolower("jpg") || strtolower($p["extension"]) == strtolower("jpeg");
        $link_url = $is_pic_galler ? $img_url : $thumb_url;
        $img_url = realurl(baseurl($row['url']), $link_url);
        print "  {$img_url}\n";
        list($res, $data) = urlload($img_url, array("getinfo" => true, "referer" => $row["url"], "cookies" => $cookie_path));
        if ($res["http_code"] != "200") {
            q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"]));
            file_put_contents("errors.log", "{$row['id']}\tHTTP Error - {$res['http_code']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND);
        } else {
            if ($res["content_type"] == "image/jpeg") {
                $thumb = (int) one(q("select num from thumb_num")) + 1;
                $folder = $path . ceil($thumb / 1000);
                q("update thumb_num set num={$thumb}");
                if (!is_dir($folder)) {
                    mkdir($folder, 0777);
                }
                if (file_exists($name = "{$folder}/{$thumb}.jpg")) {
                    unlink($name);
                }
                file_put_contents($name, $data);
                q("update gallery set thumb=%s, approved=1\n\t\t\t\twhere id = %s", array($thumb, $row["id"]));
                file_put_contents("downloaded.log", "{$row['id']}\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n\t{$thumb}\n", FILE_APPEND);
            } else {
                q("update gallery set approved=2\n\t\t\t\twhere id = %s", array($row["id"]));
                file_put_contents("errors.log", "{$row['id']}\tError image/video type\t{$row['url']}\n\t{$link_url}\n\t{$img_url}\n", FILE_APPEND);
            }
        }
    }
}
示例#4
0
function get_json_data($service, $object = NULL, $params = '', $repo = NULL)
{
    $service_url = $GLOBALS[$service . '_api_url'];
    $service_id = $GLOBALS[$service . '_id'];
    $cache_file = $GLOBALS[$service . '_cache_file'];
    global $cache_dir;
    // Attempt to make the cache directory if it doesn't already exist
    if (!file_exists($cache_dir)) {
        mkdir($cache_dir);
    }
    // Local 'tmp' cache file on the webserver, preferably out of public reach, i.e.
    // htdocs/tmp/.json_github_lmms_releases.
    $tmp_suffix = ($repo ? $repo : $service_id) . ($object ? '_' . $object : '');
    $tmp_suffix = str_replace('/', '', str_replace('.', '', str_replace('__', '_', $tmp_suffix)));
    // For "resolve" requests, hash the track URL for cache filename
    if ($service == 'soundcloud' && $params && strpos($params, '://') !== false) {
        $tmp_suffix = md5($params) . $tmp_suffix;
    }
    $tmp_cache = $cache_dir . $cache_file . $tmp_suffix;
    // If the repository isn't specified, assume it's the same as the project name and build accordingly
    // i.e. "https://api.github.com/repos/lmms/lmms/releases?param=value"
    // i.e. "https://www.googleapis.com/plus/v1/people/113001340835122723950/activities/public?maxResults=25
    switch ($service) {
        case 'youtube':
            $full_api = $service_url . ($object ? $object : 'playlists') . '?channelId=' . ($repo ? $repo : $service_id) . $params;
            break;
        case 'soundcloud':
            $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . ($object ? $object : 'tracks') . '.json' . $params;
            break;
        case 'facebook':
            $full_api = $service_url . '?id=' . ($repo ? $repo : $service_id) . '&format=json' . $params;
            break;
        case 'google':
            $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . $object . '/public/' . $params;
            break;
        case 'github':
        default:
            $full_api = $service_url . ($repo ? $repo : $service_id) . '/' . $service_id . '/' . $object . $params;
    }
    $using_url = false;
    if (cache_expired($tmp_cache)) {
        $json = file_get_contents_curl(realurl($full_api), $service);
        $using_url = true;
    } else {
        $json = file_get_contents($tmp_cache);
    }
    $obj = json_decode($json);
    /*
     * If there's valid JSON data, AND it came from the web cache it
     * If not, fall back to the previous cache
     */
    if (has_children($obj, $service)) {
        if ($using_url) {
            @file_put_contents($tmp_cache, $json, LOCK_EX);
        }
        return $obj;
    } else {
        $json = @file_get_contents($tmp_cache);
        return json_decode($json);
    }
}