/** * Fetch the data via SimplePie_File * * If the data is already cached, attempt to fetch it from there instead * @param SimplePie_Cache|false $cache Cache handler, or false to not load from the cache * @return array|true Returns true if the data was loaded from the cache, or an array of HTTP headers and sniffed type */ protected function fetch_data(&$cache) { // If it's enabled, use the cache if ($cache) { // Load the Cache $this->data = $cache->load(); if ($cache->mtime() + $this->cache_duration > time()) { $this->raw_data = false; return true; // If the cache is still valid, just return true } elseif (!empty($this->data)) { // If the cache is for an outdated build of SimplePie if (!isset($this->data['build']) || $this->data['build'] !== SIMPLEPIE_BUILD) { $cache->unlink(); $this->data = array(); } elseif (isset($this->data['url']) && $this->data['url'] !== $this->feed_url) { $cache = false; $this->data = array(); } elseif (isset($this->data['feed_url'])) { // If the autodiscovery cache is still valid use it. if ($cache->mtime() + $this->autodiscovery_cache_duration > time()) { // Do not need to do feed autodiscovery yet. if ($this->data['feed_url'] !== $this->data['url']) { $this->set_feed_url($this->data['feed_url']); return $this->init(); } $cache->unlink(); $this->data = array(); } } else { $headers = array('Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1'); if (isset($this->data['headers']['last-modified'])) { $headers['if-modified-since'] = $this->data['headers']['last-modified']; } if (isset($this->data['headers']['etag'])) { $headers['if-none-match'] = $this->data['headers']['etag']; } $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options)); if ($file->success) { if ($file->status_code === 304) { $cache->touch(); return true; } } else { $cache->touch(); $this->error = $file->error; return !empty($this->data); } $md5 = $this->cleanMd5($file->body); if ($this->data['md5'] === $md5) { if ($this->syslog_enabled) { syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url)); } $cache->touch(); return true; //Content unchanged even though server did not send a 304 } else { if ($this->syslog_enabled) { syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url)); } $this->data['md5'] = $md5; } } } else { $cache->touch(); //To keep the date/time of the last tentative update $this->data = array(); } } // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it. if (!isset($file)) { if ($this->file instanceof SimplePie_File && $this->file->url === $this->feed_url) { $file =& $this->file; } else { $headers = array('Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1'); $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options)); } } // If the file connection has an error, set SimplePie::error to that and quit if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) { $this->error = $file->error; return !empty($this->data); } if (!$this->force_feed) { // Check if the supplied URL is a feed, if it isn't, look for it. $locate = $this->registry->create('Locator', array(&$file, $this->timeout, $this->useragent, $this->max_checked_feeds)); if (!$locate->is_feed($file)) { $copyStatusCode = $file->status_code; $copyContentType = $file->headers['content-type']; // We need to unset this so that if SimplePie::set_file() has been called that object is untouched unset($file); try { if (!($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds))) { $this->error = "A feed could not be found at `{$this->feed_url}`; the status code is `{$copyStatusCode}` and content-type is `{$copyContentType}`"; $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); return false; } } catch (SimplePie_Exception $e) { // This is usually because DOMDocument doesn't exist $this->error = $e->getMessage(); $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine())); return false; } if ($cache) { $this->data = array('url' => $this->feed_url, 'feed_url' => $file->url, 'build' => SIMPLEPIE_BUILD); $this->data['mtime'] = time(); $this->data['md5'] = empty($md5) ? $this->cleanMd5($file->body) : $md5; if (!$cache->save($this)) { trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); } $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, call_user_func($this->cache_name_function, $file->url), 'spc')); } $this->feed_url = $file->url; } $locate = null; } $this->raw_data = $file->body; $this->permanent_url = $file->permanent_url; $headers = $file->headers; $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file)); $sniffed = $sniffer->get_type(); return array($headers, $sniffed); }
function get_content_by_parsing($url, $path) { require_once LIB_PATH . '/lib_phpQuery.php'; Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); $html = file_get_contents($url); if ($html) { $doc = phpQuery::newDocument($html); $content = $doc->find($path); foreach (pq('img[data-src]') as $img) { $imgP = pq($img); $dataSrc = $imgP->attr('data-src'); if (strlen($dataSrc) > 4) { $imgP->attr('src', $dataSrc); $imgP->removeAttr('data-src'); } } return sanitizeHTML($content->__toString(), $url); } else { throw new Exception(); } }
public function load($loadDetails = false) { if ($this->url !== null) { if (CACHE_PATH === false) { throw new Minz_FileNotExistException('CACHE_PATH', Minz_Exception::ERROR); } else { $url = htmlspecialchars_decode($this->url, ENT_QUOTES); if ($this->httpAuth != '') { $url = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url); } $feed = customSimplePie(); if (substr($url, -11) === '#force_feed') { $feed->force_feed(true); $url = substr($url, 0, -11); } $feed->set_feed_url($url); if (!$loadDetails) { //Only activates auto-discovery when adding a new feed $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE); } $mtime = $feed->init(); if (!$mtime || $feed->error()) { $errorMessage = $feed->error(); throw new FreshRSS_Feed_Exception(($errorMessage == '' ? 'Feed error' : $errorMessage) . ' [' . $url . ']'); } $links = $feed->get_links('self'); $this->selfUrl = isset($links[0]) ? $links[0] : null; $links = $feed->get_links('hub'); $this->hubUrl = isset($links[0]) ? $links[0] : null; if ($loadDetails) { // si on a utilisé l'auto-discover, notre url va avoir changé $subscribe_url = $feed->subscribe_url(false); $title = strtr(html_only_entity_decode($feed->get_title()), array('<' => '<', '>' => '>', '"' => '"')); //HTML to HTML-PRE //ENT_COMPAT except & $this->_name($title == '' ? $url : $title); $this->_website(html_only_entity_decode($feed->get_link())); $this->_description(html_only_entity_decode($feed->get_description())); } else { //The case of HTTP 301 Moved Permanently $subscribe_url = $feed->subscribe_url(true); } $clean_url = SimplePie_Misc::url_remove_credentials($subscribe_url); if ($subscribe_url !== null && $subscribe_url !== $url) { $this->_url($clean_url); } if ($mtime === true || $mtime > $this->lastUpdate) { //Minz_Log::debug('FreshRSS no cache ' . $mtime . ' > ' . $this->lastUpdate . ' for ' . $clean_url); $this->loadEntries($feed); // et on charge les articles du flux } else { //Minz_Log::debug('FreshRSS use cache for ' . $clean_url); $this->entries = array(); } $feed->__destruct(); //http://simplepie.org/wiki/faq/i_m_getting_memory_leaks unset($feed); } } }
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $curl_options = array(), $syslog_enabled = SIMPLEPIE_SYSLOG) { if (class_exists('idna_convert')) { $idn = new idna_convert(); $parsed = SimplePie_Misc::parse_url($url); $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); } $this->url = $url; $this->permanent_url = $url; $this->useragent = $useragent; if (preg_match('/^http(s)?:\\/\\//i', $url)) { if ($syslog_enabled) { syslog(LOG_INFO, 'SimplePie GET ' . SimplePie_Misc::url_remove_credentials($url)); //FreshRSS } if ($useragent === null) { $useragent = ini_get('user_agent'); $this->useragent = $useragent; } if (!is_array($headers)) { $headers = array(); } if (!$force_fsockopen && function_exists('curl_exec')) { $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; $fp = curl_init(); $headers2 = array(); foreach ($headers as $key => $value) { $headers2[] = "{$key}: {$value}"; } if (version_compare(SimplePie_Misc::get_curl_version(), '7.10.5', '>=')) { curl_setopt($fp, CURLOPT_ENCODING, ''); } curl_setopt($fp, CURLOPT_URL, $url); curl_setopt($fp, CURLOPT_HEADER, 1); curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1); curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($fp, CURLOPT_REFERER, $url); curl_setopt($fp, CURLOPT_USERAGENT, $useragent); curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); if (!ini_get('open_basedir') && !ini_get('safe_mode') && version_compare(SimplePie_Misc::get_curl_version(), '7.15.2', '>=')) { curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects); } foreach ($curl_options as $curl_param => $curl_value) { curl_setopt($fp, $curl_param, $curl_value); } $this->headers = curl_exec($fp); if (curl_errno($fp) === 23 || curl_errno($fp) === 61) { curl_setopt($fp, CURLOPT_ENCODING, 'none'); $this->headers = curl_exec($fp); } if (curl_errno($fp)) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); $this->success = false; } else { $info = curl_getinfo($fp); curl_close($fp); $this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1); $this->headers = array_pop($this->headers); $parser = new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); $previousStatusCode = $this->status_code; $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); $this->permanent_url = $previousStatusCode == 301 ? $location : $url; return; } } } } else { $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN; $url_parts = parse_url($url); $socket_host = $url_parts['host']; if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') { $socket_host = "ssl://{$url_parts['host']}"; $url_parts['port'] = 443; } if (!isset($url_parts['port'])) { $url_parts['port'] = 80; } $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout); if (!$fp) { $this->error = 'fsockopen error: ' . $errstr; $this->success = false; } else { stream_set_timeout($fp, $timeout); if (isset($url_parts['path'])) { if (isset($url_parts['query'])) { $get = "{$url_parts['path']}?{$url_parts['query']}"; } else { $get = $url_parts['path']; } } else { $get = '/'; } $out = "GET {$get} HTTP/1.1\r\n"; $out .= "Host: {$url_parts['host']}\r\n"; $out .= "User-Agent: {$useragent}\r\n"; if (extension_loaded('zlib')) { $out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n"; } if (isset($url_parts['user']) && isset($url_parts['pass'])) { $out .= "Authorization: Basic " . base64_encode("{$url_parts['user']}:{$url_parts['pass']}") . "\r\n"; } foreach ($headers as $key => $value) { $out .= "{$key}: {$value}\r\n"; } $out .= "Connection: Close\r\n\r\n"; fwrite($fp, $out); $info = stream_get_meta_data($fp); $this->headers = ''; while (!$info['eof'] && !$info['timed_out']) { $this->headers .= fread($fp, 1160); $info = stream_get_meta_data($fp); } if (!$info['timed_out']) { $parser = new SimplePie_HTTP_Parser($this->headers); if ($parser->parse()) { $this->headers = $parser->headers; $this->body = $parser->body; $this->status_code = $parser->status_code; if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); $previousStatusCode = $this->status_code; $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); $this->permanent_url = $previousStatusCode == 301 ? $location : $url; //FreshRSS return; } if (isset($this->headers['content-encoding'])) { // Hey, we act dumb elsewhere, so let's do that here too switch (strtolower(trim($this->headers['content-encoding'], "\t\n\r "))) { case 'gzip': case 'x-gzip': $decoder = new SimplePie_gzdecode($this->body); if (!$decoder->parse()) { $this->error = 'Unable to decode HTTP "gzip" stream'; $this->success = false; } else { $this->body = $decoder->data; } break; case 'deflate': if (($decompressed = gzinflate($this->body)) !== false) { $this->body = $decompressed; } else { if (($decompressed = gzuncompress($this->body)) !== false) { $this->body = $decompressed; } else { if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false) { $this->body = $decompressed; } else { $this->error = 'Unable to decode HTTP "deflate" stream'; $this->success = false; } } } break; default: $this->error = 'Unknown content coding'; $this->success = false; } } } } else { $this->error = 'fsocket timed out'; $this->success = false; } fclose($fp); } } } else { $this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS; if (empty($url) || !($this->body = file_get_contents($url))) { $this->error = 'file_get_contents could not read the file'; $this->success = false; } } }