Example #1
0
 function GetStandard($url, $redirection)
 {
     global $HTTP_ERROR;
     if ($this->username) {
         $this->request_headers['Authorization'] = "Basic " . base64_encode($this->username . ":" . $this->password);
     }
     if (($parsed_url = parse_url($url)) !== FALSE) {
         $this->end_url = $url;
         $this->body = '';
         $this->request_headers['Host'] = $parsed_url['host'];
         $ipaddr = gethostbyname($parsed_url['host']);
         if ($ipaddr == $parsed_url['host']) {
             $this->errstr = $HTTP_ERROR[CURLE_COULDNT_RESOLVE_HOST];
             return FALSE;
         }
         if (empty($parsed_url['port'])) {
             $parsed_url['port'] = 80;
         }
         if (($socket = @fsockopen($ipaddr, $parsed_url['port'], $errno, $errstr, $this->connect_timeout)) !== FALSE) {
             stream_set_timeout($socket, $this->read_timeout);
             fputs($socket, $this->PrepareHeaders($parsed_url));
             $headers_done = FALSE;
             $start_time = $this->microtime_float();
             while ($line = fgets($socket, 8192)) {
                 if ($line === FALSE) {
                     $this->errstr = $HTTP_ERROR[CURLE_RECV_ERROR];
                     fclose($socket);
                     return FALSE;
                 }
                 if (!$headers_done && preg_match('|^\\s+$|', $line)) {
                     $headers_done = TRUE;
                     continue;
                 }
                 if ($headers_done) {
                     $this->body .= $line;
                 } else {
                     $this->ReadResponseHeader(null, $line);
                 }
             }
             $end_time = $this->microtime_float();
             fclose($socket);
             $this->request_info['size_download'] = strlen($this->body);
             $this->request_info['speed_download'] = sprintf('%.2f', $this->request_info['size_download'] / ($end_time - $start_time) / 1024);
             if ($redirection && $this->response_headers['location']) {
                 // Get the new URL to access
                 $new_url = RelativeToAbsolute($url, $this->response_headers['location']);
                 // Clear the previous response headers
                 unset($this->response_headers);
                 unset($this->raw_response_headers);
                 $this->redirects++;
                 $this->end_url = $new_url;
                 if ($this->redirects > $this->max_redirects) {
                     $this->errstr = $HTTP_ERROR[CURLE_TOO_MANY_REDIRECTS];
                     return FALSE;
                 }
                 return $this->GetStandard($new_url, $redirection);
             } else {
                 if ($this->response_headers['status_code'] >= 300) {
                     $this->errstr = sprintf($HTTP_ERROR[CURLE_HTTP_RETURNED_ERROR], $this->response_headers['status']);
                     return FALSE;
                 }
             }
             return TRUE;
         } else {
             $this->errstr = sprintf($HTTP_ERROR[CURLE_COULDNT_CONNECT], $errstr);
             return FALSE;
         }
     } else {
         $this->errstr = $HTTP_ERROR[CURLE_URL_MALFORMAT];
         return FALSE;
     }
 }
Example #2
0
 function tagOpen(&$parser, $name, $attrs)
 {
     global $C;
     foreach ($attrs as $key => $val) {
         $attrs[$key] = trim($val);
     }
     switch ($name) {
         case 'a':
             $href_no_query = preg_replace('~\\?.*$~', '', $attrs['href']);
             $is_picture_link = preg_match("~\\.({$this->picture_exts})\$~i", $href_no_query);
             $is_movie_link = preg_match("~\\.({$this->movie_exts})\$~i", $href_no_query);
             if ($is_picture_link || $is_movie_link) {
                 $this->in_links[] = RelativeToAbsolute($this->base_url, $attrs['href']);
             } else {
                 $this->num_links++;
             }
             break;
         case 'img':
             // Images with a small width or height generally aren't the thumbnail
             if (isset($attrs['height']) && isset($attrs['width']) && ($attrs['height'] < $C['min_thumb_height'] || $attrs['width'] < $C['min_thumb_width'] || $attrs['height'] > $C['max_thumb_height'] || $attrs['width'] > $C['max_thumb_width'])) {
                 break;
             }
             $src_no_query = preg_replace('~\\?.*$~', '', $attrs['src']);
             $is_thumbnail = preg_match("~\\.({$this->link_exts})\$~i", $src_no_query);
             if ($is_thumbnail) {
                 $imgsrc = RelativeToAbsolute($this->base_url, $attrs['src']);
                 $this->images[] = array('preview' => $imgsrc, 'full' => $imgsrc, 'content' => $imgsrc);
             }
             if (count($this->in_links)) {
                 if ($is_thumbnail) {
                     $link = array_pop($this->in_links);
                     $link_no_query = preg_replace('~\\?.*$~', '', $link);
                     $is_picture_link = preg_match("~\\.({$this->picture_exts})\$~i", $link_no_query);
                     $is_movie_link = preg_match("~\\.({$this->movie_exts})\$~i", $link_no_query);
                     $format = $is_picture_link ? FMT_PICTURES : FMT_MOVIES;
                     if (!isset($this->thumbs[$format][$link_no_query])) {
                         $is_picture_link ? $this->num_picture_links++ : $this->num_movie_links++;
                         $this->num_content_links++;
                     }
                     $attrs['src'] = RelativeToAbsolute($this->base_url, $attrs['src']);
                     $this->thumbs[$format][$link_no_query] = array('preview' => $attrs['src'], 'full' => $is_movie_link ? $attrs['src'] : $link, 'content' => $link);
                 } else {
                     $this->num_links++;
                 }
             }
             break;
         case 'base':
             if (isset($attrs['href']) && preg_match('~^https?://~i', $attrs['href'])) {
                 $this->base_url = $attrs['href'];
             }
             break;
         case 'script':
             if (isset($attrs['src'])) {
                 $this->scripts[] = RelativeToAbsolute($this->base_url, $attrs['src']);
             }
             break;
     }
 }
Example #3
0
 public static function ExtractUrls($base_url, $html)
 {
     $video_urls = array();
     $thumb_urls = array();
     $dom = new DOMDocument();
     @$dom->loadHTML($html);
     // See if a <base> tag is defined, and if so set $base_url
     $bases = $dom->getElementsByTagName('base');
     foreach ($bases as $base) {
         $href = $base->getAttribute('href');
         if (!empty($href) && preg_match('~^https?://~i', $href)) {
             $base_url = $href;
             break;
         }
     }
     // Check <a> tags
     $as = $dom->getElementsByTagName('a');
     foreach ($as as $a) {
         $href = $a->getAttribute('href');
         if (!empty($href) && preg_match('~\\.(' . VIDEO_EXTENSIONS . ')(\\?.*)?$~U', $href)) {
             $video_urls[] = RelativeToAbsolute($base_url, $href);
             $imgs = $a->getElementsBytagName('img');
             foreach ($imgs as $img) {
                 $src = $img->getAttribute('src');
                 if (preg_match('~\\.' . JPG_EXTENSION . '(\\?.*)?$~U', $src)) {
                     $thumb_urls[] = RelativeToAbsolute($base_url, $src);
                 }
             }
         }
     }
     // Check <map> tags
     $maps = $dom->getElementsByTagName('map');
     $imgs = $dom->getElementsByTagName('img');
     foreach ($maps as $map) {
         $map_name = strtolower('#' . $map->getAttribute('name'));
         $areas = $map->getElementsByTagName('area');
         foreach ($areas as $area) {
             $href = $area->getAttribute('href');
             $coords = $area->getAttribute('coords');
             if (!empty($href) && preg_match('~\\.(' . VIDEO_EXTENSIONS . ')(\\?.*)?$~U', $href)) {
                 $video_urls[] = RelativeToAbsolute($base_url, $href);
                 foreach ($imgs as $img) {
                     $src = $img->getAttribute('src');
                     $usemap = strtolower($img->getAttribute('usemap'));
                     if (empty($usemap) || $usemap != $map_name) {
                         continue;
                     }
                     if (preg_match('~\\.' . JPG_EXTENSION . '(\\?.*)?$~U', $src)) {
                         $thumb_urls[] = "[{$coords}]" . RelativeToAbsolute($base_url, $src);
                     }
                 }
             }
         }
     }
     return array(array_unique($thumb_urls), array_unique($video_urls));
 }