Exemple #1
0
 /**
  * Fetch the data via SimplePie_File
  *
  * If the data is already cached, attempt to fetch it from there instead
  * @param SimplePie_Cache|false $cache Cache handler, or false to not load from the cache
  * @return array|true Returns true if the data was loaded from the cache, or an array of HTTP headers and sniffed type
  */
 protected function fetch_data(&$cache)
 {
     // If it's enabled, use the cache
     if ($cache) {
         // Load the Cache
         $this->data = $cache->load();
         if ($cache->mtime() + $this->cache_duration > time()) {
             $this->raw_data = false;
             return true;
             // If the cache is still valid, just return true
         } elseif (!empty($this->data)) {
             // If the cache is for an outdated build of SimplePie
             if (!isset($this->data['build']) || $this->data['build'] !== SIMPLEPIE_BUILD) {
                 $cache->unlink();
                 $this->data = array();
             } elseif (isset($this->data['url']) && $this->data['url'] !== $this->feed_url) {
                 $cache = false;
                 $this->data = array();
             } elseif (isset($this->data['feed_url'])) {
                 // If the autodiscovery cache is still valid use it.
                 if ($cache->mtime() + $this->autodiscovery_cache_duration > time()) {
                     // Do not need to do feed autodiscovery yet.
                     if ($this->data['feed_url'] !== $this->data['url']) {
                         $this->set_feed_url($this->data['feed_url']);
                         return $this->init();
                     }
                     $cache->unlink();
                     $this->data = array();
                 }
             } else {
                 $headers = array('Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1');
                 if (isset($this->data['headers']['last-modified'])) {
                     $headers['if-modified-since'] = $this->data['headers']['last-modified'];
                 }
                 if (isset($this->data['headers']['etag'])) {
                     $headers['if-none-match'] = $this->data['headers']['etag'];
                 }
                 $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
                 if ($file->success) {
                     if ($file->status_code === 304) {
                         $cache->touch();
                         return true;
                     }
                 } else {
                     $cache->touch();
                     $this->error = $file->error;
                     return !empty($this->data);
                 }
                 $md5 = $this->cleanMd5($file->body);
                 if ($this->data['md5'] === $md5) {
                     if ($this->syslog_enabled) {
                         syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url));
                     }
                     $cache->touch();
                     return true;
                     //Content unchanged even though server did not send a 304
                 } else {
                     if ($this->syslog_enabled) {
                         syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url));
                     }
                     $this->data['md5'] = $md5;
                 }
             }
         } else {
             $cache->touch();
             //To keep the date/time of the last tentative update
             $this->data = array();
         }
     }
     // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it.
     if (!isset($file)) {
         if ($this->file instanceof SimplePie_File && $this->file->url === $this->feed_url) {
             $file =& $this->file;
         } else {
             $headers = array('Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1');
             $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
         }
     }
     // If the file connection has an error, set SimplePie::error to that and quit
     if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) {
         $this->error = $file->error;
         return !empty($this->data);
     }
     if (!$this->force_feed) {
         // Check if the supplied URL is a feed, if it isn't, look for it.
         $locate = $this->registry->create('Locator', array(&$file, $this->timeout, $this->useragent, $this->max_checked_feeds));
         if (!$locate->is_feed($file)) {
             $copyStatusCode = $file->status_code;
             $copyContentType = $file->headers['content-type'];
             // We need to unset this so that if SimplePie::set_file() has been called that object is untouched
             unset($file);
             try {
                 if (!($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds))) {
                     $this->error = "A feed could not be found at `{$this->feed_url}`; the status code is `{$copyStatusCode}` and content-type is `{$copyContentType}`";
                     $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__));
                     return false;
                 }
             } catch (SimplePie_Exception $e) {
                 // This is usually because DOMDocument doesn't exist
                 $this->error = $e->getMessage();
                 $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine()));
                 return false;
             }
             if ($cache) {
                 $this->data = array('url' => $this->feed_url, 'feed_url' => $file->url, 'build' => SIMPLEPIE_BUILD);
                 $this->data['mtime'] = time();
                 $this->data['md5'] = empty($md5) ? $this->cleanMd5($file->body) : $md5;
                 if (!$cache->save($this)) {
                     trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
                 }
                 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, call_user_func($this->cache_name_function, $file->url), 'spc'));
             }
             $this->feed_url = $file->url;
         }
         $locate = null;
     }
     $this->raw_data = $file->body;
     $this->permanent_url = $file->permanent_url;
     $headers = $file->headers;
     $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file));
     $sniffed = $sniffer->get_type();
     return array($headers, $sniffed);
 }
Exemple #2
0
function get_content_by_parsing($url, $path)
{
    require_once LIB_PATH . '/lib_phpQuery.php';
    Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
    $html = file_get_contents($url);
    if ($html) {
        $doc = phpQuery::newDocument($html);
        $content = $doc->find($path);
        foreach (pq('img[data-src]') as $img) {
            $imgP = pq($img);
            $dataSrc = $imgP->attr('data-src');
            if (strlen($dataSrc) > 4) {
                $imgP->attr('src', $dataSrc);
                $imgP->removeAttr('data-src');
            }
        }
        return sanitizeHTML($content->__toString(), $url);
    } else {
        throw new Exception();
    }
}
Exemple #3
0
 public function load($loadDetails = false)
 {
     if ($this->url !== null) {
         if (CACHE_PATH === false) {
             throw new Minz_FileNotExistException('CACHE_PATH', Minz_Exception::ERROR);
         } else {
             $url = htmlspecialchars_decode($this->url, ENT_QUOTES);
             if ($this->httpAuth != '') {
                 $url = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url);
             }
             $feed = customSimplePie();
             if (substr($url, -11) === '#force_feed') {
                 $feed->force_feed(true);
                 $url = substr($url, 0, -11);
             }
             $feed->set_feed_url($url);
             if (!$loadDetails) {
                 //Only activates auto-discovery when adding a new feed
                 $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
             }
             $mtime = $feed->init();
             if (!$mtime || $feed->error()) {
                 $errorMessage = $feed->error();
                 throw new FreshRSS_Feed_Exception(($errorMessage == '' ? 'Feed error' : $errorMessage) . ' [' . $url . ']');
             }
             $links = $feed->get_links('self');
             $this->selfUrl = isset($links[0]) ? $links[0] : null;
             $links = $feed->get_links('hub');
             $this->hubUrl = isset($links[0]) ? $links[0] : null;
             if ($loadDetails) {
                 // si on a utilisé l'auto-discover, notre url va avoir changé
                 $subscribe_url = $feed->subscribe_url(false);
                 $title = strtr(html_only_entity_decode($feed->get_title()), array('<' => '&lt;', '>' => '&gt;', '"' => '&quot;'));
                 //HTML to HTML-PRE	//ENT_COMPAT except &
                 $this->_name($title == '' ? $url : $title);
                 $this->_website(html_only_entity_decode($feed->get_link()));
                 $this->_description(html_only_entity_decode($feed->get_description()));
             } else {
                 //The case of HTTP 301 Moved Permanently
                 $subscribe_url = $feed->subscribe_url(true);
             }
             $clean_url = SimplePie_Misc::url_remove_credentials($subscribe_url);
             if ($subscribe_url !== null && $subscribe_url !== $url) {
                 $this->_url($clean_url);
             }
             if ($mtime === true || $mtime > $this->lastUpdate) {
                 //Minz_Log::debug('FreshRSS no cache ' . $mtime . ' > ' . $this->lastUpdate . ' for ' . $clean_url);
                 $this->loadEntries($feed);
                 // et on charge les articles du flux
             } else {
                 //Minz_Log::debug('FreshRSS use cache for ' . $clean_url);
                 $this->entries = array();
             }
             $feed->__destruct();
             //http://simplepie.org/wiki/faq/i_m_getting_memory_leaks
             unset($feed);
         }
     }
 }
Exemple #4
0
 public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $curl_options = array(), $syslog_enabled = SIMPLEPIE_SYSLOG)
 {
     if (class_exists('idna_convert')) {
         $idn = new idna_convert();
         $parsed = SimplePie_Misc::parse_url($url);
         $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
     }
     $this->url = $url;
     $this->permanent_url = $url;
     $this->useragent = $useragent;
     if (preg_match('/^http(s)?:\\/\\//i', $url)) {
         if ($syslog_enabled) {
             syslog(LOG_INFO, 'SimplePie GET ' . SimplePie_Misc::url_remove_credentials($url));
             //FreshRSS
         }
         if ($useragent === null) {
             $useragent = ini_get('user_agent');
             $this->useragent = $useragent;
         }
         if (!is_array($headers)) {
             $headers = array();
         }
         if (!$force_fsockopen && function_exists('curl_exec')) {
             $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
             $fp = curl_init();
             $headers2 = array();
             foreach ($headers as $key => $value) {
                 $headers2[] = "{$key}: {$value}";
             }
             if (version_compare(SimplePie_Misc::get_curl_version(), '7.10.5', '>=')) {
                 curl_setopt($fp, CURLOPT_ENCODING, '');
             }
             curl_setopt($fp, CURLOPT_URL, $url);
             curl_setopt($fp, CURLOPT_HEADER, 1);
             curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
             curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
             curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
             curl_setopt($fp, CURLOPT_REFERER, $url);
             curl_setopt($fp, CURLOPT_USERAGENT, $useragent);
             curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
             if (!ini_get('open_basedir') && !ini_get('safe_mode') && version_compare(SimplePie_Misc::get_curl_version(), '7.15.2', '>=')) {
                 curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1);
                 curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects);
             }
             foreach ($curl_options as $curl_param => $curl_value) {
                 curl_setopt($fp, $curl_param, $curl_value);
             }
             $this->headers = curl_exec($fp);
             if (curl_errno($fp) === 23 || curl_errno($fp) === 61) {
                 curl_setopt($fp, CURLOPT_ENCODING, 'none');
                 $this->headers = curl_exec($fp);
             }
             if (curl_errno($fp)) {
                 $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
                 $this->success = false;
             } else {
                 $info = curl_getinfo($fp);
                 curl_close($fp);
                 $this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1);
                 $this->headers = array_pop($this->headers);
                 $parser = new SimplePie_HTTP_Parser($this->headers);
                 if ($parser->parse()) {
                     $this->headers = $parser->headers;
                     $this->body = $parser->body;
                     $this->status_code = $parser->status_code;
                     if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) {
                         $this->redirects++;
                         $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
                         $previousStatusCode = $this->status_code;
                         $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
                         $this->permanent_url = $previousStatusCode == 301 ? $location : $url;
                         return;
                     }
                 }
             }
         } else {
             $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN;
             $url_parts = parse_url($url);
             $socket_host = $url_parts['host'];
             if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') {
                 $socket_host = "ssl://{$url_parts['host']}";
                 $url_parts['port'] = 443;
             }
             if (!isset($url_parts['port'])) {
                 $url_parts['port'] = 80;
             }
             $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout);
             if (!$fp) {
                 $this->error = 'fsockopen error: ' . $errstr;
                 $this->success = false;
             } else {
                 stream_set_timeout($fp, $timeout);
                 if (isset($url_parts['path'])) {
                     if (isset($url_parts['query'])) {
                         $get = "{$url_parts['path']}?{$url_parts['query']}";
                     } else {
                         $get = $url_parts['path'];
                     }
                 } else {
                     $get = '/';
                 }
                 $out = "GET {$get} HTTP/1.1\r\n";
                 $out .= "Host: {$url_parts['host']}\r\n";
                 $out .= "User-Agent: {$useragent}\r\n";
                 if (extension_loaded('zlib')) {
                     $out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n";
                 }
                 if (isset($url_parts['user']) && isset($url_parts['pass'])) {
                     $out .= "Authorization: Basic " . base64_encode("{$url_parts['user']}:{$url_parts['pass']}") . "\r\n";
                 }
                 foreach ($headers as $key => $value) {
                     $out .= "{$key}: {$value}\r\n";
                 }
                 $out .= "Connection: Close\r\n\r\n";
                 fwrite($fp, $out);
                 $info = stream_get_meta_data($fp);
                 $this->headers = '';
                 while (!$info['eof'] && !$info['timed_out']) {
                     $this->headers .= fread($fp, 1160);
                     $info = stream_get_meta_data($fp);
                 }
                 if (!$info['timed_out']) {
                     $parser = new SimplePie_HTTP_Parser($this->headers);
                     if ($parser->parse()) {
                         $this->headers = $parser->headers;
                         $this->body = $parser->body;
                         $this->status_code = $parser->status_code;
                         if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) {
                             $this->redirects++;
                             $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
                             $previousStatusCode = $this->status_code;
                             $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
                             $this->permanent_url = $previousStatusCode == 301 ? $location : $url;
                             //FreshRSS
                             return;
                         }
                         if (isset($this->headers['content-encoding'])) {
                             // Hey, we act dumb elsewhere, so let's do that here too
                             switch (strtolower(trim($this->headers['content-encoding'], "\t\n\r "))) {
                                 case 'gzip':
                                 case 'x-gzip':
                                     $decoder = new SimplePie_gzdecode($this->body);
                                     if (!$decoder->parse()) {
                                         $this->error = 'Unable to decode HTTP "gzip" stream';
                                         $this->success = false;
                                     } else {
                                         $this->body = $decoder->data;
                                     }
                                     break;
                                 case 'deflate':
                                     if (($decompressed = gzinflate($this->body)) !== false) {
                                         $this->body = $decompressed;
                                     } else {
                                         if (($decompressed = gzuncompress($this->body)) !== false) {
                                             $this->body = $decompressed;
                                         } else {
                                             if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false) {
                                                 $this->body = $decompressed;
                                             } else {
                                                 $this->error = 'Unable to decode HTTP "deflate" stream';
                                                 $this->success = false;
                                             }
                                         }
                                     }
                                     break;
                                 default:
                                     $this->error = 'Unknown content coding';
                                     $this->success = false;
                             }
                         }
                     }
                 } else {
                     $this->error = 'fsocket timed out';
                     $this->success = false;
                 }
                 fclose($fp);
             }
         }
     } else {
         $this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS;
         if (empty($url) || !($this->body = file_get_contents($url))) {
             $this->error = 'file_get_contents could not read the file';
             $this->success = false;
         }
     }
 }