Esempio n. 1
0
File: test.php Progetto: rair/yacs
 // insert anchor prefix
 if (is_object($anchor)) {
     $context['text'] .= $anchor->get_prefix();
 }
 // main url
 if ($item['main_url']) {
     $context['text'] .= '<p>' . sprintf(i18n::s('Main URL: %s'), Skin::build_link($item['main_url'], NULL, 'external')) . "</p>\n";
 }
 // no feed url
 if (!$item['feed_url']) {
     Logger::error(i18n::s('No feed url has been configured for this server profile.'));
 } else {
     // display the feed URL
     $context['text'] .= '<p>' . sprintf(i18n::s('Feed URL: %s'), Skin::build_link($item['feed_url'], NULL, 'external')) . "</p>\n";
     // fetch news from the provided link
     if (!($news = Feeds::get_remote_news_from($item['feed_url'])) || !is_array($news)) {
         $context['text'] .= '<p>' . i18n::s('Nothing to read from this feed.') . "</p>\n";
     } else {
         // statistics
         $context['text'] .= '<p>' . sprintf(i18n::s('%d elements have been read'), count($news)) . "</p>\n";
         // list banned hosts
         $banned_pattern = Servers::get_banned_pattern();
         // where links should be anchored
         $reference = '';
         if (is_object($anchor)) {
             $reference = $anchor->get_reference();
         }
         // process retrieved links
         $context['text'] .= '<ul>' . "\n";
         foreach ($news as $item) {
             // debug
Esempio n. 2
0
File: feeds.php Progetto: rair/yacs
 /**
  * get news from remote servers
  *
  * This function queries remote sources and populate the table of links based on fetched news.
  *
  * On tick, the including hook calls [code]Feeds::tick_hook()[/code].
  * See [script]control/scan.php[/script] for a more complete description of hooks.
  *
  * The function browses the database to locate servers acting as feeders, and read the URLs to use.
  *
  * A round-robin algorithm is implemented, meaning that servers are polled in sequence throughout successive ticks.
  * At most 1 feed is parsed on each tick, to limit impact when the "poor-man" cron mechanism is used,
  * which is the default setting.
  *
  * XML feeds are fetched and parsed according to their type.
  * At the moment YACS is able to process RSS and slashdot feeds.
  * Link records are created or updated in the database saving as much of possible of provided data.
  * Item data is reflected in Link, Title, and Description fields.
  * Channel	data is used to populate the Source field.
  * Stamping information is based on feeding date, and channel title.
  * Also, the edit action 'link:feed' marks links that are collected from feeders.
  * The anchor field is set to the category assigned in the server profile.
  *
  * At the end of the feeding process, the database is purged from oldest links according to the limit
  * defined in parameters/feeds.include.php, set through feeds/configure.php.
  * See Links::purge_old_news().
  *
  * @param boolean if set to true, fetch news on each call; else use normal period of time
  * @return a string to be displayed in resulting page, if any
  *
  * @see control/scan.php
  * @see feeds/configure.php
  */
 public static function tick_hook($forced = FALSE)
 {
     global $context;
     // load librairies only once
     include_once $context['path_to_root'] . 'links/links.php';
     include_once $context['path_to_root'] . 'servers/servers.php';
     include_once $context['path_to_root'] . 'shared/values.php';
     // feeds.tick
     // get feeding parameters
     Safe::load('parameters/feeds.include.php');
     // delay between feeds - minimum is 5 minutes
     if (!isset($context['minutes_between_feeds']) || $context['minutes_between_feeds'] < 5) {
         $context['minutes_between_feeds'] = 5;
     }
     // do not wait for the end of a feeding cycle
     if ($forced) {
         $threshold = gmstrftime('%Y-%m-%d %H:%M:%S');
     } else {
         $threshold = gmstrftime('%Y-%m-%d %H:%M:%S', time() - $context['minutes_between_feeds'] * 60);
     }
     // get a batch of feeders
     if (!($feeders = Servers::list_for_feed(0, 1, 'feed'))) {
         return 'feeds/feeds.php: no feed has been defined' . BR;
     }
     // remember start time
     $start_time = get_micro_time();
     // list banned tokens
     $banned_pattern = Servers::get_banned_pattern();
     // browse each feed
     $count = 0;
     foreach ($feeders as $server_id => $attributes) {
         // get specific feed parameters
         list($feed_url, $feed_title, $anchor, $stamp) = $attributes;
         // skip servers processed recently
         if ($stamp > $threshold) {
             continue;
         }
         // flag this record to enable round-robin even on error
         Servers::stamp($server_id);
         // fetch news from the provided link
         if (!($news = Feeds::get_remote_news_from($feed_url)) || !is_array($news)) {
             continue;
         }
         // no anchor has been defined for this feed
         if (!$anchor) {
             // create a default section if necessary
             if (!($anchor = Sections::lookup('external_news'))) {
                 $fields = array();
                 $fields['nick_name'] = 'external_news';
                 $fields['create_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time());
                 $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time());
                 $fields['index_map'] = 'N';
                 $fields['locked'] = 'Y';
                 // no direct contributions
                 $fields['rank'] = 40000;
                 // at the end of the list
                 $fields['title'] = i18n::c('External News');
                 $fields['description'] = i18n::c('Received from feeding servers');
                 if (!($fields['id'] = Sections::post($fields))) {
                     Logger::remember('feeds/feeds.php: Impossible to add a section.');
                     return;
                 }
                 $anchor = 'section:' . $fields['id'];
             }
         }
         // process retrieved links
         $links = 0;
         foreach ($news as $item) {
             // link has to be valid
             if (!isset($item['link']) || !($item['title'] . $item['description'])) {
                 if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') {
                     Logger::remember('feeds/feeds.php: feed item is invalid', $item, 'debug');
                 }
                 continue;
             }
             // skip banned servers
             if ($banned_pattern && preg_match($banned_pattern, $item['link'])) {
                 if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') {
                     Logger::remember('feeds/feeds.php: feed host has been banned', $item['link'], 'debug');
                 }
                 continue;
             }
             // one link processed
             $links++;
             // link description
             $fields = array();
             $fields['anchor'] = $anchor;
             $fields['link_url'] = $item['link'];
             $fields['title'] = $item['title'];
             $fields['description'] = $item['description'];
             if ($item['category']) {
                 $fields['description'] .= ' (' . $item['category'] . ')';
             }
             $fields['edit_name'] = $feed_title;
             $fields['edit_address'] = $feed_url;
             $fields['edit_action'] = 'link:feed';
             if ($item['pubDate']) {
                 $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate']));
             }
             // update links that already exist in the database
             if (Links::have($item['link'], $anchor, $fields)) {
                 continue;
             }
             // save link in the database
             if (!Links::post($fields)) {
                 Logger::remember('feeds/feeds.php: Impossible to save feed link: ' . Logger::error_pop());
             }
         }
         // one feed has been processed
         $count += 1;
         // remember tick date
         Values::set('feeds.tick.' . $feed_url, $links);
     }
     // cap the number of links used for news
     if (!isset($context['maximum_news']) || !$context['maximum_news']) {
         $context['maximum_news'] = 1000;
     }
     if ($context['maximum_news'] > 10) {
         include_once $context['path_to_root'] . 'links/links.php';
         Links::purge_old_news($context['maximum_news']);
     }
     // compute execution time
     $time = round(get_micro_time() - $start_time, 2);
     // report on work achieved
     if ($count > 1) {
         return 'feeds/feeds.php: ' . $count . ' feeds have been processed (' . $time . ' seconds)' . BR;
     } elseif ($count == 1) {
         return 'feeds/feeds.php: 1 feed has been processed (' . $time . ' seconds)' . BR;
     } else {
         return 'feeds/feeds.php: nothing to do (' . $time . ' seconds)' . BR;
     }
 }