コード例 #1
0
//////////////////////////////////
// Set up HTTP agent
//////////////////////////////////
$http = new HumbleHttpAgent();
$http->debug = $debug_mode;
$http->userAgentMap = $options->user_agents;
$http->headerOnlyTypes = array_keys($options->content_type_exc);
$http->rewriteUrls = $options->rewrite_url;
//$http->initCache($options->cache_dir, $options->cache_directory_level, $options->cache_cleanup, isset($options->http_cache_ttl) ? $options->http_cache_ttl : 12*60*60);
//////////////////////////////////
// Set up Content Extractor
//////////////////////////////////
$extractor = new ContentExtractor(dirname(__FILE__) . '/site_config/custom', dirname(__FILE__) . '/site_config/standard');
$extractor->debug = $debug_mode;
SiteConfig::$debug = $debug_mode;
SiteConfig::use_apc($options->apc);
$extractor->fingerprints = $options->fingerprints;
$extractor->allowedParsers = $options->allowed_parsers;
////////////////////////////////
// Get RSS/Atom feed
////////////////////////////////
if (!$html_only) {
    debug('--------');
    debug("Attempting to process URL as feed");
    // Send user agent header showing PHP (prevents a HTML response from feedburner)
    $http->userAgentDefault = HumbleHttpAgent::UA_PHP;
    // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance
    SimplePie_HumbleHttpAgent::set_agent($http);
    $feed = new SimplePie();
    // some feeds use the text/html content type - force_feed tells SimplePie to process anyway
    $feed->force_feed(true);
コード例 #2
0
ファイル: fulltextrss.php プロジェクト: ZMOM1031/selfoss
 /**
  * fetch content from FullTextRss
  *
  * @author Jean Baptiste Favre
  * @return string content
  */
 private function fetchFromWebSite($url)
 {
     $this->extractor = new \ContentExtractor(\F3::get('FTRSS_DATA_DIR') . '/custom', \F3::get('FTRSS_DATA_DIR') . '/standard');
     if (\F3::get('logger_level') === "DEBUG") {
         ob_start();
         $this->extractor->debug = true;
         \SiteConfig::$debug = true;
     }
     \SiteConfig::use_apc(false);
     $this->extractor->fingerprints = $this->fingerprints;
     $this->extractor->allowedParsers = $this->allowed_parsers;
     $stream_opts = array('http' => array('timeout' => 5, 'method' => "GET", 'header' => "Accept-language: en-us,en-gb;q=0.8,en;q=0.6,fr;q=0.4,fr-fr;q=0.2\r\n" . "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" . "User-Agent: SimplePie/1.3.1 (Feed Parser; http://simplepie.org; Allow like Gecko) Build/20121030175911" . "DNT: 1"));
     $context = stream_context_create($stream_opts);
     $url = $this->removeTrackersFromUrl($url);
     // Load web page
     $html = @file_get_contents($url, false, $context);
     if ($html === false) {
         return false;
     }
     return $html;
 }