/** * process one single HTTP request * * This function removes any PHPSESSID data in the query string, if any * * @return void * * @see agents/referrals_hook.php */ public static function check_request() { global $context; // don't bother with HEAD requests if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'HEAD') { return; } // the target url if (!isset($_SERVER['REQUEST_URI']) || !($url = $_SERVER['REQUEST_URI'])) { return; } // only remember viewed pages and index pages if (!preg_match('/\\/(index|view).php/', $url)) { return; } // continue only if we have a referer if (!isset($_SERVER['HTTP_REFERER']) || !($referer = $_SERVER['HTTP_REFERER'])) { return; } // do not memorize cache referrals if (preg_match('/cache:/i', $referer)) { return; } // block pernicious attacks $referer = strip_tags($referer); // only remember external referrals if (preg_match('/\\b' . preg_quote(str_replace('www.', '', $context['host_name']), '/') . '\\b/i', $referer)) { return; } // stop crawlers if (Surfer::is_crawler()) { return; } // avoid banned sources include_once $context['path_to_root'] . 'servers/servers.php'; if (preg_match(Servers::get_banned_pattern(), $referer)) { return; } // normalize the referral, extract keywords, and domain list($referer, $domain, $keywords) = Referrals::normalize($referer); // if a record exists for this url $query = "SELECT id FROM " . SQL::table_name('referrals') . " AS referrals" . " WHERE referrals.url LIKE '" . SQL::escape($url) . "' AND referrals.referer LIKE '" . SQL::escape($referer) . "'"; if (!($item = SQL::query_first($query))) { return; } // update figures if (isset($item['id'])) { $query = "UPDATE " . SQL::table_name('referrals') . " SET" . " hits=hits+1," . " stamp='" . gmstrftime('%Y-%m-%d %H:%M:%S') . "'" . " WHERE id = " . $item['id']; // create a new record } else { // ensure the referer is accessible if (($content = http::proceed($referer)) === FALSE) { return; } // we have to find a reference to ourself in this page if (strpos($content, $context['url_to_home']) === FALSE) { return; } $query = "INSERT INTO " . SQL::table_name('referrals') . " SET" . " url='" . SQL::escape($url) . "'," . " referer='" . SQL::escape($referer) . "'," . " domain='" . SQL::escape($domain) . "'," . " keywords='" . SQL::escape($keywords) . "'," . " hits=1," . " stamp='" . gmstrftime('%Y-%m-%d %H:%M:%S') . "'"; } // actual database update if (SQL::query($query) === FALSE) { return; } // prune with a probability of 1/100 if (rand(1, 100) != 50) { return; } // purge oldest records -- 100 days = 8640000 seconds $query = "DELETE FROM " . SQL::table_name('referrals') . " WHERE stamp < '" . gmstrftime('%Y-%m-%d %H:%M:%S', time() - 8640000) . "'"; SQL::query($query); }
} elseif ($links_offset) { $context['text'] .= '<p>' . i18n::s('No broken link has been found.') . '</p>'; } // display the execution time $time = round(get_micro_time() - $context['start_time'], 2); $context['text'] .= '<p>' . sprintf(i18n::s('Script terminated in %.2f seconds.'), $time) . '</p>'; // forward to the index page $menu = array('links/' => i18n::s('Links')); $context['text'] .= Skin::build_list($menu, 'menu_bar'); // check referrals through the Internet } elseif (isset($_REQUEST['action']) && $_REQUEST['action'] == 'referrals') { // scan links $context['text'] .= '<p>' . sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links')) . "</p>\n"; // avoid banned sources include_once $context['path_to_root'] . 'servers/servers.php'; $banned_pattern = Servers::get_banned_pattern(); // process all links $links_offset = 0; while ($links_offset < MAXIMUM_SIZE) { // seek the database and check newest referrals include_once '../agents/referrals.php'; if (($rows = Referrals::list_by_dates($links_offset, CHUNK_SIZE)) && count($rows)) { // analyze each link foreach ($rows as $item) { $url = $item['referer']; // avoid banned sources if (preg_match($banned_pattern, $url)) { $context['text'] .= 'x'; $banned[] = $url; // delete the referral from the database Referrals::delete($url);
/** * get news from remote servers * * This function queries remote sources and populate the table of links based on fetched news. * * On tick, the including hook calls [code]Feeds::tick_hook()[/code]. * See [script]control/scan.php[/script] for a more complete description of hooks. * * The function browses the database to locate servers acting as feeders, and read the URLs to use. * * A round-robin algorithm is implemented, meaning that servers are polled in sequence throughout successive ticks. * At most 1 feed is parsed on each tick, to limit impact when the "poor-man" cron mechanism is used, * which is the default setting. * * XML feeds are fetched and parsed according to their type. * At the moment YACS is able to process RSS and slashdot feeds. * Link records are created or updated in the database saving as much of possible of provided data. * Item data is reflected in Link, Title, and Description fields. * Channel data is used to populate the Source field. * Stamping information is based on feeding date, and channel title. * Also, the edit action 'link:feed' marks links that are collected from feeders. * The anchor field is set to the category assigned in the server profile. * * At the end of the feeding process, the database is purged from oldest links according to the limit * defined in parameters/feeds.include.php, set through feeds/configure.php. * See Links::purge_old_news(). * * @param boolean if set to true, fetch news on each call; else use normal period of time * @return a string to be displayed in resulting page, if any * * @see control/scan.php * @see feeds/configure.php */ public static function tick_hook($forced = FALSE) { global $context; // load librairies only once include_once $context['path_to_root'] . 'links/links.php'; include_once $context['path_to_root'] . 'servers/servers.php'; include_once $context['path_to_root'] . 'shared/values.php'; // feeds.tick // get feeding parameters Safe::load('parameters/feeds.include.php'); // delay between feeds - minimum is 5 minutes if (!isset($context['minutes_between_feeds']) || $context['minutes_between_feeds'] < 5) { $context['minutes_between_feeds'] = 5; } // do not wait for the end of a feeding cycle if ($forced) { $threshold = gmstrftime('%Y-%m-%d %H:%M:%S'); } else { $threshold = gmstrftime('%Y-%m-%d %H:%M:%S', time() - $context['minutes_between_feeds'] * 60); } // get a batch of feeders if (!($feeders = Servers::list_for_feed(0, 1, 'feed'))) { return 'feeds/feeds.php: no feed has been defined' . BR; } // remember start time $start_time = get_micro_time(); // list banned tokens $banned_pattern = Servers::get_banned_pattern(); // browse each feed $count = 0; foreach ($feeders as $server_id => $attributes) { // get specific feed parameters list($feed_url, $feed_title, $anchor, $stamp) = $attributes; // skip servers processed recently if ($stamp > $threshold) { continue; } // flag this record to enable round-robin even on error Servers::stamp($server_id); // fetch news from the provided link if (!($news = Feeds::get_remote_news_from($feed_url)) || !is_array($news)) { continue; } // no anchor has been defined for this feed if (!$anchor) { // create a default section if necessary if (!($anchor = Sections::lookup('external_news'))) { $fields = array(); $fields['nick_name'] = 'external_news'; $fields['create_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time()); $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time()); $fields['index_map'] = 'N'; $fields['locked'] = 'Y'; // no direct contributions $fields['rank'] = 40000; // at the end of the list $fields['title'] = i18n::c('External News'); $fields['description'] = i18n::c('Received from feeding servers'); if (!($fields['id'] = Sections::post($fields))) { Logger::remember('feeds/feeds.php: Impossible to add a section.'); return; } $anchor = 'section:' . $fields['id']; } } // process retrieved links $links = 0; foreach ($news as $item) { // link has to be valid if (!isset($item['link']) || !($item['title'] . $item['description'])) { if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') { Logger::remember('feeds/feeds.php: feed item is invalid', $item, 'debug'); } continue; } // skip banned servers if ($banned_pattern && preg_match($banned_pattern, $item['link'])) { if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') { Logger::remember('feeds/feeds.php: feed host has been banned', $item['link'], 'debug'); } continue; } // one link processed $links++; // link description $fields = array(); $fields['anchor'] = $anchor; $fields['link_url'] = $item['link']; $fields['title'] = $item['title']; $fields['description'] = $item['description']; if ($item['category']) { $fields['description'] .= ' (' . $item['category'] . ')'; } $fields['edit_name'] = $feed_title; $fields['edit_address'] = $feed_url; $fields['edit_action'] = 'link:feed'; if ($item['pubDate']) { $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate'])); } // update links that already exist in the database if (Links::have($item['link'], $anchor, $fields)) { continue; } // save link in the database if (!Links::post($fields)) { Logger::remember('feeds/feeds.php: Impossible to save feed link: ' . Logger::error_pop()); } } // one feed has been processed $count += 1; // remember tick date Values::set('feeds.tick.' . $feed_url, $links); } // cap the number of links used for news if (!isset($context['maximum_news']) || !$context['maximum_news']) { $context['maximum_news'] = 1000; } if ($context['maximum_news'] > 10) { include_once $context['path_to_root'] . 'links/links.php'; Links::purge_old_news($context['maximum_news']); } // compute execution time $time = round(get_micro_time() - $start_time, 2); // report on work achieved if ($count > 1) { return 'feeds/feeds.php: ' . $count . ' feeds have been processed (' . $time . ' seconds)' . BR; } elseif ($count == 1) { return 'feeds/feeds.php: 1 feed has been processed (' . $time . ' seconds)' . BR; } else { return 'feeds/feeds.php: nothing to do (' . $time . ' seconds)' . BR; } }