$anchor = 'section:' . $matches[1]; } // we are linking to a category if (preg_match('/\\/categories\\/view.php\\/(\\w+)/', $target, $matches)) { $anchor = 'category:' . $matches[1]; } elseif (preg_match('/\\/categories\\/view.php\\?id=(\\w+)/', $target, $matches)) { $anchor = 'category:' . $matches[1]; } // caller has been banned if (isset($_SERVER['REMOTE_HOST']) && ($server = Servers::get($_SERVER['REMOTE_HOST']) && $server['process_ping'] != 'Y')) { $response = 49; } elseif (!preg_match('/^' . preg_quote($context['url_to_home'], '/') . '/i', $target)) { $response = 33; } elseif (!$anchor) { $response = 33; } elseif (Links::have($source, $anchor)) { $response = 48; } elseif (($content = http::proceed($source)) === FALSE) { $response = 16; } else { // ensure enough execution time Safe::set_time_limit(30); // we have to find a reference to the target here if (($position = strpos($content, $target)) === FALSE) { $response = 17; } else { // try to grab a title if (preg_match("/<h1>(.*)<\\/h1>/i", $content, $matches)) { $fields['title'] = $matches[1]; } elseif (preg_match("/<title>(.*)<\\/title>/i", $content, $matches)) { $fields['title'] = $matches[1];
} // nice display $context['text'] .= '<li>' . Skin::build_link($item['link'], $item['title'], 'external'); if ($item['description']) { $context['text'] .= ' - ' . $item['description']; } if ($item['category']) { $context['text'] .= ' (' . $item['category'] . ')'; } if ($item['pubDate']) { $context['text'] .= ' ' . gmstrftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate'])); } // flag links if ($banned_pattern && preg_match($banned_pattern, $item['link'])) { $context['text'] .= BR . '(' . i18n::s('the target server is banned') . ')'; } elseif (Links::have($item['link'], $reference)) { $context['text'] .= BR . '(' . i18n::s('this link exists in the database') . ')'; } else { $context['text'] .= BR . '(' . i18n::s('this link has not been inserted in the database yet') . ')'; } $context['text'] .= '</li>' . "\n"; } $context['text'] .= '</ul>' . "\n"; } } // the related anchor if (is_object($anchor)) { $context['text'] .= '<p>' . sprintf(i18n::s('Related to %s'), Skin::build_link($anchor->get_url(), $anchor->get_title(), 'category')) . "</p>\n"; } // insert anchor suffix if (is_object($anchor)) {
$context['page_title'] = sprintf(i18n::s('Reference: %s'), $title); } // stop crawlers if (Surfer::is_crawler()) { Safe::header('Status: 401 Unauthorized', TRUE, 401); Logger::error(i18n::s('You are not allowed to perform this operation.')); // process uploaded data } elseif (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'POST') { // save the request if debug mode if (isset($context['debug_trackback']) && $context['debug_trackback'] == 'Y') { Logger::remember('links/trackback.php: trackback request', $_REQUEST, 'debug'); } // do we have a valid target to track? if (!$anchor || !is_object($anchor)) { $response = array('faultCode' => 1, 'faultString' => 'Nothing to track'); } elseif (Links::have($source, $anchor->get_reference())) { $response = array('faultCode' => 1, 'faultString' => 'The source has already been registered'); } elseif (($content = http::proceed($source)) === FALSE) { $response = array('faultCode' => 1, 'faultString' => 'Cannot read source address ' . $source); } else { // ensure enough execution time Safe::set_time_limit(30); // we are coming from this form -- stop robots if (strpos($_SERVER['HTTP_REFERER'], $context['script_url']) !== FALSE) { if (Surfer::may_be_a_robot()) { $response = array('faultCode' => 1, 'faultString' => 'Please prove you are not a robot'); } // remote call -- get network address a.b.c.d of caller } elseif (!isset($_SERVER['REMOTE_ADDR']) || !($ip = preg_replace('/[^0-9.]/', '', $_SERVER['REMOTE_ADDR']))) { $response = array('faultCode' => 1, 'faultString' => 'Invalid request'); } elseif (!($items = @parse_url($source)) || !isset($items['host'])) {
/** * get news from remote servers * * This function queries remote sources and populate the table of links based on fetched news. * * On tick, the including hook calls [code]Feeds::tick_hook()[/code]. * See [script]control/scan.php[/script] for a more complete description of hooks. * * The function browses the database to locate servers acting as feeders, and read the URLs to use. * * A round-robin algorithm is implemented, meaning that servers are polled in sequence throughout successive ticks. * At most 1 feed is parsed on each tick, to limit impact when the "poor-man" cron mechanism is used, * which is the default setting. * * XML feeds are fetched and parsed according to their type. * At the moment YACS is able to process RSS and slashdot feeds. * Link records are created or updated in the database saving as much of possible of provided data. * Item data is reflected in Link, Title, and Description fields. * Channel data is used to populate the Source field. * Stamping information is based on feeding date, and channel title. * Also, the edit action 'link:feed' marks links that are collected from feeders. * The anchor field is set to the category assigned in the server profile. * * At the end of the feeding process, the database is purged from oldest links according to the limit * defined in parameters/feeds.include.php, set through feeds/configure.php. * See Links::purge_old_news(). * * @param boolean if set to true, fetch news on each call; else use normal period of time * @return a string to be displayed in resulting page, if any * * @see control/scan.php * @see feeds/configure.php */ public static function tick_hook($forced = FALSE) { global $context; // load librairies only once include_once $context['path_to_root'] . 'links/links.php'; include_once $context['path_to_root'] . 'servers/servers.php'; include_once $context['path_to_root'] . 'shared/values.php'; // feeds.tick // get feeding parameters Safe::load('parameters/feeds.include.php'); // delay between feeds - minimum is 5 minutes if (!isset($context['minutes_between_feeds']) || $context['minutes_between_feeds'] < 5) { $context['minutes_between_feeds'] = 5; } // do not wait for the end of a feeding cycle if ($forced) { $threshold = gmstrftime('%Y-%m-%d %H:%M:%S'); } else { $threshold = gmstrftime('%Y-%m-%d %H:%M:%S', time() - $context['minutes_between_feeds'] * 60); } // get a batch of feeders if (!($feeders = Servers::list_for_feed(0, 1, 'feed'))) { return 'feeds/feeds.php: no feed has been defined' . BR; } // remember start time $start_time = get_micro_time(); // list banned tokens $banned_pattern = Servers::get_banned_pattern(); // browse each feed $count = 0; foreach ($feeders as $server_id => $attributes) { // get specific feed parameters list($feed_url, $feed_title, $anchor, $stamp) = $attributes; // skip servers processed recently if ($stamp > $threshold) { continue; } // flag this record to enable round-robin even on error Servers::stamp($server_id); // fetch news from the provided link if (!($news = Feeds::get_remote_news_from($feed_url)) || !is_array($news)) { continue; } // no anchor has been defined for this feed if (!$anchor) { // create a default section if necessary if (!($anchor = Sections::lookup('external_news'))) { $fields = array(); $fields['nick_name'] = 'external_news'; $fields['create_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time()); $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time()); $fields['index_map'] = 'N'; $fields['locked'] = 'Y'; // no direct contributions $fields['rank'] = 40000; // at the end of the list $fields['title'] = i18n::c('External News'); $fields['description'] = i18n::c('Received from feeding servers'); if (!($fields['id'] = Sections::post($fields))) { Logger::remember('feeds/feeds.php: Impossible to add a section.'); return; } $anchor = 'section:' . $fields['id']; } } // process retrieved links $links = 0; foreach ($news as $item) { // link has to be valid if (!isset($item['link']) || !($item['title'] . $item['description'])) { if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') { Logger::remember('feeds/feeds.php: feed item is invalid', $item, 'debug'); } continue; } // skip banned servers if ($banned_pattern && preg_match($banned_pattern, $item['link'])) { if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') { Logger::remember('feeds/feeds.php: feed host has been banned', $item['link'], 'debug'); } continue; } // one link processed $links++; // link description $fields = array(); $fields['anchor'] = $anchor; $fields['link_url'] = $item['link']; $fields['title'] = $item['title']; $fields['description'] = $item['description']; if ($item['category']) { $fields['description'] .= ' (' . $item['category'] . ')'; } $fields['edit_name'] = $feed_title; $fields['edit_address'] = $feed_url; $fields['edit_action'] = 'link:feed'; if ($item['pubDate']) { $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate'])); } // update links that already exist in the database if (Links::have($item['link'], $anchor, $fields)) { continue; } // save link in the database if (!Links::post($fields)) { Logger::remember('feeds/feeds.php: Impossible to save feed link: ' . Logger::error_pop()); } } // one feed has been processed $count += 1; // remember tick date Values::set('feeds.tick.' . $feed_url, $links); } // cap the number of links used for news if (!isset($context['maximum_news']) || !$context['maximum_news']) { $context['maximum_news'] = 1000; } if ($context['maximum_news'] > 10) { include_once $context['path_to_root'] . 'links/links.php'; Links::purge_old_news($context['maximum_news']); } // compute execution time $time = round(get_micro_time() - $start_time, 2); // report on work achieved if ($count > 1) { return 'feeds/feeds.php: ' . $count . ' feeds have been processed (' . $time . ' seconds)' . BR; } elseif ($count == 1) { return 'feeds/feeds.php: 1 feed has been processed (' . $time . ' seconds)' . BR; } else { return 'feeds/feeds.php: nothing to do (' . $time . ' seconds)' . BR; } }