Esempio n. 1
0
    die('Invalid URL: ' . htmlspecialchars($url));
}
//TODO: cleanup URL
////////////////////////////////
// Base URL
////////////////////////////////
$_host = $_SERVER['HTTP_HOST'];
$_path = rtrim(dirname($_SERVER['SCRIPT_NAME']), '/\\');
$base = 'http://' . htmlspecialchars($_host . $_path);
//TODO: use HubmleHTTPAgent
require_once 'lib/simplepie/autoloader.php';
require_once 'lib/humble-http-agent/HumbleHttpAgent.php';
require_once 'lib/humble-http-agent/CookieJar.php';
$html = '';
$_req_options = null;
$http = new HumbleHttpAgent($_req_options);
//$http->debug = true;
if (($response = $http->get($url, true)) && $response['status_code'] < 300) {
    $html = $response['body'];
    //$html = convert_to_utf8($html, $response['headers']);
    //$html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
} else {
    die('Failed to fetch URL');
}
if (trim($html) == '') {
    die('Empty response :(');
}
// use Tidy?
if (isset($_GET['tidy']) && $_GET['tidy'] === '1') {
    if (!function_exists('tidy_parse_string')) {
        die('Tidy requested but not available on server.');
Esempio n. 2
0
    echo '<h3>Help</h3>';
    echo '<p>If you have any trouble, please contact us via our <a href="http://help.fivefilters.org">support site</a>.</p>';
    exit;
}
//////////////////////////////////
// Check update key valid
//////////////////////////////////
if ($_REQUEST['key'] !== $admin_hash) {
    println("Sorry, invalid key supplied.");
    exit;
}
//////////////////////////////////
// Check for updates
//////////////////////////////////
//$ff_version = @file_get_contents('http://fivefilters.org/content-only/site_config/standard/version.txt');
$http = new HumbleHttpAgent();
$latest_info_json = $http->get('https://api.github.com/repos/fivefilters/ftr-site-config');
//$_context = stream_context_create(array('http' => array('user_agent' => 'PHP/5.5'), 'ssl'=>array('verify_peer'=>false)));
//$latest_info_json = file_get_contents('https://api.github.com/repos/fivefilters/ftr-site-config', false, $_context);
if (!$latest_info_json) {
    println("Sorry, couldn't get info on latest site config files. Please try again later or contact us.");
    exit;
}
$latest_info_json = $latest_info_json['body'];
$latest_info_json = @json_decode($latest_info_json);
if (!is_object($latest_info_json)) {
    println("Sorry, couldn't parse JSON from GitHub. Please try again later or contact us.");
    exit;
}
$ff_version = $latest_info_json->pushed_at;
if ($version == $ff_version) {
        echo $data;
        exit;
    }
}
//////////////////////////////////
// Set Expires header
//////////////////////////////////
if ($valid_key) {
    header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 60 * 10) . ' GMT');
} else {
    header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 60 * 20) . ' GMT');
}
//////////////////////////////////
// Set up HTTP agent
//////////////////////////////////
$http = new HumbleHttpAgent();
/*
if ($options->caching) {
	$frontendOptions = array(
	   'lifetime' => 30*60, // cache lifetime of 30 minutes
	   'automatic_serialization' => true,
	   'write_control' => false,
	   'automatic_cleaning_factor' => $options->cache_cleanup,
	   'ignore_user_abort' => false
	); 
	$backendOptions = array(
		'cache_dir' => $options->cache_dir.'/http-responses/', // directory where to put the cache files
		'file_locking' => false,
		'read_control' => true,
		'read_control_type' => 'strlen',
		'hashed_directory_level' => $options->cache_directory_level,
Esempio n. 4
0
 protected function updateAllParallel($subscriptions)
 {
     zf_debugRuntime("before feeds parallel update");
     $urls = array();
     foreach ($subscriptions as $sub) {
         $url = ZF_URL . '/pub/index.php?q=force-refresh&id=' . $sub->source->id;
         $urls[] = $url;
     }
     // Request all feed items in parallel (if supported)
     $http = new HumbleHttpAgent();
     $http->userAgentDefault = HumbleHttpAgent::UA_PHP;
     zf_debug('fetching all ' . sizeof($urls) . ' feeds', DBG_FEED);
     $http->fetchAll($urls);
     foreach ($urls as $url) {
         zf_debug('going after ' . $url, DBG_FEED);
         if ($url && ($response = $http->get($url, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
             $effective_url = $response['effective_url'];
             /*zf_debug('response: '. $response['body'], DBG_FEED);
             		if(DBG_FEED & ZF_DEBUG) var_dump($response);*/
         }
     }
     zf_debugRuntime("End of parallel update");
 }
 public function extractContentBlock($permalink)
 {
     $extractor = new ContentExtractor(dirname(__FILE__) . '/site_config/custom', dirname(__FILE__) . '/site_config/standard');
     //$extractor = $this;
     $extractor->next_page_deep_count = $this->next_page_deep_count + 1;
     $extractor->next_pages = $this->next_pages;
     if (in_array($permalink, $extractor->next_pages)) {
         return FALSE;
     }
     $extractor->next_pages[] = $permalink;
     if ($extractor->next_page_deep_count > 3) {
         return FALSE;
     }
     $extractor->fingerprints = $this->options->fingerprints;
     $elem = new ContentExtractor($this->path, $this->fallback);
     $extractor->fingerprints = $this->fingerprints;
     $http = new HumbleHttpAgent();
     $response = $http->get($permalink, true);
     //echo 'status_code: '. $response['status_code'] . "\n\n";
     if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
         $html = $response['body'];
         //echo "html: " .$html;
         // remove strange things
         $html = str_replace('</[>', '', $html);
         $html = convert_to_utf8($html, $response['headers']);
         if (function_exists('mb_convert_encoding')) {
             $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
         }
         $extract_result = $extractor->process($html, $permalink);
         //$readability = $extractor->readability;
         $content_block = $extract_result ? $extractor->getContent() : null;
         //echo "content_block->innerHTML: ". $content_block->innerHTML . "\n\n";
         //$this->body->appendChild($elem);
     }
     $doc = new DOMDocument();
     if (@$doc->loadHTML($content_block->innerHTML)) {
         $doc->saveHTML();
         //$content = $this->readability->dom->loadHTML($content_block->innerHTML);
         $content = $this->readability->dom->createElement('div', $content_block->innerHTML);
         $content = $this->readability->dom->importNode($content_block, true);
         return $content;
     } else {
         return FALSE;
     }
     return FALSE;
     //return $content_block;
 }