die('Invalid URL: ' . htmlspecialchars($url)); } //TODO: cleanup URL //////////////////////////////// // Base URL //////////////////////////////// $_host = $_SERVER['HTTP_HOST']; $_path = rtrim(dirname($_SERVER['SCRIPT_NAME']), '/\\'); $base = 'http://' . htmlspecialchars($_host . $_path); //TODO: use HubmleHTTPAgent require_once 'lib/simplepie/autoloader.php'; require_once 'lib/humble-http-agent/HumbleHttpAgent.php'; require_once 'lib/humble-http-agent/CookieJar.php'; $html = ''; $_req_options = null; $http = new HumbleHttpAgent($_req_options); //$http->debug = true; if (($response = $http->get($url, true)) && $response['status_code'] < 300) { $html = $response['body']; //$html = convert_to_utf8($html, $response['headers']); //$html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); } else { die('Failed to fetch URL'); } if (trim($html) == '') { die('Empty response :('); } // use Tidy? if (isset($_GET['tidy']) && $_GET['tidy'] === '1') { if (!function_exists('tidy_parse_string')) { die('Tidy requested but not available on server.');
echo '<h3>Help</h3>'; echo '<p>If you have any trouble, please contact us via our <a href="http://help.fivefilters.org">support site</a>.</p>'; exit; } ////////////////////////////////// // Check update key valid ////////////////////////////////// if ($_REQUEST['key'] !== $admin_hash) { println("Sorry, invalid key supplied."); exit; } ////////////////////////////////// // Check for updates ////////////////////////////////// //$ff_version = @file_get_contents('http://fivefilters.org/content-only/site_config/standard/version.txt'); $http = new HumbleHttpAgent(); $latest_info_json = $http->get('https://api.github.com/repos/fivefilters/ftr-site-config'); //$_context = stream_context_create(array('http' => array('user_agent' => 'PHP/5.5'), 'ssl'=>array('verify_peer'=>false))); //$latest_info_json = file_get_contents('https://api.github.com/repos/fivefilters/ftr-site-config', false, $_context); if (!$latest_info_json) { println("Sorry, couldn't get info on latest site config files. Please try again later or contact us."); exit; } $latest_info_json = $latest_info_json['body']; $latest_info_json = @json_decode($latest_info_json); if (!is_object($latest_info_json)) { println("Sorry, couldn't parse JSON from GitHub. Please try again later or contact us."); exit; } $ff_version = $latest_info_json->pushed_at; if ($version == $ff_version) {
echo $data; exit; } } ////////////////////////////////// // Set Expires header ////////////////////////////////// if ($valid_key) { header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 60 * 10) . ' GMT'); } else { header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 60 * 20) . ' GMT'); } ////////////////////////////////// // Set up HTTP agent ////////////////////////////////// $http = new HumbleHttpAgent(); /* if ($options->caching) { $frontendOptions = array( 'lifetime' => 30*60, // cache lifetime of 30 minutes 'automatic_serialization' => true, 'write_control' => false, 'automatic_cleaning_factor' => $options->cache_cleanup, 'ignore_user_abort' => false ); $backendOptions = array( 'cache_dir' => $options->cache_dir.'/http-responses/', // directory where to put the cache files 'file_locking' => false, 'read_control' => true, 'read_control_type' => 'strlen', 'hashed_directory_level' => $options->cache_directory_level,
protected function updateAllParallel($subscriptions) { zf_debugRuntime("before feeds parallel update"); $urls = array(); foreach ($subscriptions as $sub) { $url = ZF_URL . '/pub/index.php?q=force-refresh&id=' . $sub->source->id; $urls[] = $url; } // Request all feed items in parallel (if supported) $http = new HumbleHttpAgent(); $http->userAgentDefault = HumbleHttpAgent::UA_PHP; zf_debug('fetching all ' . sizeof($urls) . ' feeds', DBG_FEED); $http->fetchAll($urls); foreach ($urls as $url) { zf_debug('going after ' . $url, DBG_FEED); if ($url && ($response = $http->get($url, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { $effective_url = $response['effective_url']; /*zf_debug('response: '. $response['body'], DBG_FEED); if(DBG_FEED & ZF_DEBUG) var_dump($response);*/ } } zf_debugRuntime("End of parallel update"); }
public function extractContentBlock($permalink) { $extractor = new ContentExtractor(dirname(__FILE__) . '/site_config/custom', dirname(__FILE__) . '/site_config/standard'); //$extractor = $this; $extractor->next_page_deep_count = $this->next_page_deep_count + 1; $extractor->next_pages = $this->next_pages; if (in_array($permalink, $extractor->next_pages)) { return FALSE; } $extractor->next_pages[] = $permalink; if ($extractor->next_page_deep_count > 3) { return FALSE; } $extractor->fingerprints = $this->options->fingerprints; $elem = new ContentExtractor($this->path, $this->fallback); $extractor->fingerprints = $this->fingerprints; $http = new HumbleHttpAgent(); $response = $http->get($permalink, true); //echo 'status_code: '. $response['status_code'] . "\n\n"; if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { $html = $response['body']; //echo "html: " .$html; // remove strange things $html = str_replace('</[>', '', $html); $html = convert_to_utf8($html, $response['headers']); if (function_exists('mb_convert_encoding')) { $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); } $extract_result = $extractor->process($html, $permalink); //$readability = $extractor->readability; $content_block = $extract_result ? $extractor->getContent() : null; //echo "content_block->innerHTML: ". $content_block->innerHTML . "\n\n"; //$this->body->appendChild($elem); } $doc = new DOMDocument(); if (@$doc->loadHTML($content_block->innerHTML)) { $doc->saveHTML(); //$content = $this->readability->dom->loadHTML($content_block->innerHTML); $content = $this->readability->dom->createElement('div', $content_block->innerHTML); $content = $this->readability->dom->importNode($content_block, true); return $content; } else { return FALSE; } return FALSE; //return $content_block; }