////////////////////////////////// // Set up HTTP agent ////////////////////////////////// $http = new HumbleHttpAgent(); $http->debug = $debug_mode; $http->userAgentMap = $options->user_agents; $http->headerOnlyTypes = array_keys($options->content_type_exc); $http->rewriteUrls = $options->rewrite_url; //$http->initCache($options->cache_dir, $options->cache_directory_level, $options->cache_cleanup, isset($options->http_cache_ttl) ? $options->http_cache_ttl : 12*60*60); ////////////////////////////////// // Set up Content Extractor ////////////////////////////////// $extractor = new ContentExtractor(dirname(__FILE__) . '/site_config/custom', dirname(__FILE__) . '/site_config/standard'); $extractor->debug = $debug_mode; SiteConfig::$debug = $debug_mode; SiteConfig::use_apc($options->apc); $extractor->fingerprints = $options->fingerprints; $extractor->allowedParsers = $options->allowed_parsers; //////////////////////////////// // Get RSS/Atom feed //////////////////////////////// if (!$html_only) { debug('--------'); debug("Attempting to process URL as feed"); // Send user agent header showing PHP (prevents a HTML response from feedburner) $http->userAgentDefault = HumbleHttpAgent::UA_PHP; // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance SimplePie_HumbleHttpAgent::set_agent($http); $feed = new SimplePie(); // some feeds use the text/html content type - force_feed tells SimplePie to process anyway $feed->force_feed(true);
/** * fetch content from FullTextRss * * @author Jean Baptiste Favre * @return string content */ private function fetchFromWebSite($url) { $this->extractor = new \ContentExtractor(\F3::get('FTRSS_DATA_DIR') . '/custom', \F3::get('FTRSS_DATA_DIR') . '/standard'); if (\F3::get('logger_level') === "DEBUG") { ob_start(); $this->extractor->debug = true; \SiteConfig::$debug = true; } \SiteConfig::use_apc(false); $this->extractor->fingerprints = $this->fingerprints; $this->extractor->allowedParsers = $this->allowed_parsers; $stream_opts = array('http' => array('timeout' => 5, 'method' => "GET", 'header' => "Accept-language: en-us,en-gb;q=0.8,en;q=0.6,fr;q=0.4,fr-fr;q=0.2\r\n" . "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" . "User-Agent: SimplePie/1.3.1 (Feed Parser; http://simplepie.org; Allow like Gecko) Build/20121030175911" . "DNT: 1")); $context = stream_context_create($stream_opts); $url = $this->removeTrackersFromUrl($url); // Load web page $html = @file_get_contents($url, false, $context); if ($html === false) { return false; } return $html; }