コード例 #1
0
ファイル: referrals.php プロジェクト: rair/yacs
 /**
  * process one single HTTP request
  *
  * This function removes any PHPSESSID data in the query string, if any
  *
  * @return void
  *
  * @see agents/referrals_hook.php
  */
 public static function check_request()
 {
     global $context;
     // don't bother with HEAD requests
     if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'HEAD') {
         return;
     }
     // the target url
     if (!isset($_SERVER['REQUEST_URI']) || !($url = $_SERVER['REQUEST_URI'])) {
         return;
     }
     // only remember viewed pages and index pages
     if (!preg_match('/\\/(index|view).php/', $url)) {
         return;
     }
     // continue only if we have a referer
     if (!isset($_SERVER['HTTP_REFERER']) || !($referer = $_SERVER['HTTP_REFERER'])) {
         return;
     }
     // do not memorize cache referrals
     if (preg_match('/cache:/i', $referer)) {
         return;
     }
     // block pernicious attacks
     $referer = strip_tags($referer);
     // only remember external referrals
     if (preg_match('/\\b' . preg_quote(str_replace('www.', '', $context['host_name']), '/') . '\\b/i', $referer)) {
         return;
     }
     // stop crawlers
     if (Surfer::is_crawler()) {
         return;
     }
     // avoid banned sources
     include_once $context['path_to_root'] . 'servers/servers.php';
     if (preg_match(Servers::get_banned_pattern(), $referer)) {
         return;
     }
     // normalize the referral, extract keywords, and domain
     list($referer, $domain, $keywords) = Referrals::normalize($referer);
     // if a record exists for this url
     $query = "SELECT id FROM " . SQL::table_name('referrals') . " AS referrals" . " WHERE referrals.url LIKE '" . SQL::escape($url) . "' AND referrals.referer LIKE '" . SQL::escape($referer) . "'";
     if (!($item = SQL::query_first($query))) {
         return;
     }
     // update figures
     if (isset($item['id'])) {
         $query = "UPDATE " . SQL::table_name('referrals') . " SET" . " hits=hits+1," . " stamp='" . gmstrftime('%Y-%m-%d %H:%M:%S') . "'" . " WHERE id = " . $item['id'];
         // create a new record
     } else {
         // ensure the referer is accessible
         if (($content = http::proceed($referer)) === FALSE) {
             return;
         }
         // we have to find a reference to ourself in this page
         if (strpos($content, $context['url_to_home']) === FALSE) {
             return;
         }
         $query = "INSERT INTO " . SQL::table_name('referrals') . " SET" . " url='" . SQL::escape($url) . "'," . " referer='" . SQL::escape($referer) . "'," . " domain='" . SQL::escape($domain) . "'," . " keywords='" . SQL::escape($keywords) . "'," . " hits=1," . " stamp='" . gmstrftime('%Y-%m-%d %H:%M:%S') . "'";
     }
     // actual database update
     if (SQL::query($query) === FALSE) {
         return;
     }
     // prune with a probability of 1/100
     if (rand(1, 100) != 50) {
         return;
     }
     // purge oldest records -- 100 days = 8640000 seconds
     $query = "DELETE FROM " . SQL::table_name('referrals') . " WHERE stamp < '" . gmstrftime('%Y-%m-%d %H:%M:%S', time() - 8640000) . "'";
     SQL::query($query);
 }
コード例 #2
0
ファイル: check.php プロジェクト: rair/yacs
    } elseif ($links_offset) {
        $context['text'] .= '<p>' . i18n::s('No broken link has been found.') . '</p>';
    }
    // display the execution time
    $time = round(get_micro_time() - $context['start_time'], 2);
    $context['text'] .= '<p>' . sprintf(i18n::s('Script terminated in %.2f seconds.'), $time) . '</p>';
    // forward to the index page
    $menu = array('links/' => i18n::s('Links'));
    $context['text'] .= Skin::build_list($menu, 'menu_bar');
    // check referrals through the Internet
} elseif (isset($_REQUEST['action']) && $_REQUEST['action'] == 'referrals') {
    // scan links
    $context['text'] .= '<p>' . sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links')) . "</p>\n";
    // avoid banned sources
    include_once $context['path_to_root'] . 'servers/servers.php';
    $banned_pattern = Servers::get_banned_pattern();
    // process all links
    $links_offset = 0;
    while ($links_offset < MAXIMUM_SIZE) {
        // seek the database and check newest referrals
        include_once '../agents/referrals.php';
        if (($rows = Referrals::list_by_dates($links_offset, CHUNK_SIZE)) && count($rows)) {
            // analyze each link
            foreach ($rows as $item) {
                $url = $item['referer'];
                // avoid banned sources
                if (preg_match($banned_pattern, $url)) {
                    $context['text'] .= 'x';
                    $banned[] = $url;
                    // delete the referral from the database
                    Referrals::delete($url);
コード例 #3
0
ファイル: feeds.php プロジェクト: rair/yacs
 /**
  * get news from remote servers
  *
  * This function queries remote sources and populate the table of links based on fetched news.
  *
  * On tick, the including hook calls [code]Feeds::tick_hook()[/code].
  * See [script]control/scan.php[/script] for a more complete description of hooks.
  *
  * The function browses the database to locate servers acting as feeders, and read the URLs to use.
  *
  * A round-robin algorithm is implemented, meaning that servers are polled in sequence throughout successive ticks.
  * At most 1 feed is parsed on each tick, to limit impact when the "poor-man" cron mechanism is used,
  * which is the default setting.
  *
  * XML feeds are fetched and parsed according to their type.
  * At the moment YACS is able to process RSS and slashdot feeds.
  * Link records are created or updated in the database saving as much of possible of provided data.
  * Item data is reflected in Link, Title, and Description fields.
  * Channel	data is used to populate the Source field.
  * Stamping information is based on feeding date, and channel title.
  * Also, the edit action 'link:feed' marks links that are collected from feeders.
  * The anchor field is set to the category assigned in the server profile.
  *
  * At the end of the feeding process, the database is purged from oldest links according to the limit
  * defined in parameters/feeds.include.php, set through feeds/configure.php.
  * See Links::purge_old_news().
  *
  * @param boolean if set to true, fetch news on each call; else use normal period of time
  * @return a string to be displayed in resulting page, if any
  *
  * @see control/scan.php
  * @see feeds/configure.php
  */
 public static function tick_hook($forced = FALSE)
 {
     global $context;
     // load librairies only once
     include_once $context['path_to_root'] . 'links/links.php';
     include_once $context['path_to_root'] . 'servers/servers.php';
     include_once $context['path_to_root'] . 'shared/values.php';
     // feeds.tick
     // get feeding parameters
     Safe::load('parameters/feeds.include.php');
     // delay between feeds - minimum is 5 minutes
     if (!isset($context['minutes_between_feeds']) || $context['minutes_between_feeds'] < 5) {
         $context['minutes_between_feeds'] = 5;
     }
     // do not wait for the end of a feeding cycle
     if ($forced) {
         $threshold = gmstrftime('%Y-%m-%d %H:%M:%S');
     } else {
         $threshold = gmstrftime('%Y-%m-%d %H:%M:%S', time() - $context['minutes_between_feeds'] * 60);
     }
     // get a batch of feeders
     if (!($feeders = Servers::list_for_feed(0, 1, 'feed'))) {
         return 'feeds/feeds.php: no feed has been defined' . BR;
     }
     // remember start time
     $start_time = get_micro_time();
     // list banned tokens
     $banned_pattern = Servers::get_banned_pattern();
     // browse each feed
     $count = 0;
     foreach ($feeders as $server_id => $attributes) {
         // get specific feed parameters
         list($feed_url, $feed_title, $anchor, $stamp) = $attributes;
         // skip servers processed recently
         if ($stamp > $threshold) {
             continue;
         }
         // flag this record to enable round-robin even on error
         Servers::stamp($server_id);
         // fetch news from the provided link
         if (!($news = Feeds::get_remote_news_from($feed_url)) || !is_array($news)) {
             continue;
         }
         // no anchor has been defined for this feed
         if (!$anchor) {
             // create a default section if necessary
             if (!($anchor = Sections::lookup('external_news'))) {
                 $fields = array();
                 $fields['nick_name'] = 'external_news';
                 $fields['create_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time());
                 $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', time());
                 $fields['index_map'] = 'N';
                 $fields['locked'] = 'Y';
                 // no direct contributions
                 $fields['rank'] = 40000;
                 // at the end of the list
                 $fields['title'] = i18n::c('External News');
                 $fields['description'] = i18n::c('Received from feeding servers');
                 if (!($fields['id'] = Sections::post($fields))) {
                     Logger::remember('feeds/feeds.php: Impossible to add a section.');
                     return;
                 }
                 $anchor = 'section:' . $fields['id'];
             }
         }
         // process retrieved links
         $links = 0;
         foreach ($news as $item) {
             // link has to be valid
             if (!isset($item['link']) || !($item['title'] . $item['description'])) {
                 if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') {
                     Logger::remember('feeds/feeds.php: feed item is invalid', $item, 'debug');
                 }
                 continue;
             }
             // skip banned servers
             if ($banned_pattern && preg_match($banned_pattern, $item['link'])) {
                 if (isset($context['debug_feeds']) && $context['debug_feeds'] == 'Y') {
                     Logger::remember('feeds/feeds.php: feed host has been banned', $item['link'], 'debug');
                 }
                 continue;
             }
             // one link processed
             $links++;
             // link description
             $fields = array();
             $fields['anchor'] = $anchor;
             $fields['link_url'] = $item['link'];
             $fields['title'] = $item['title'];
             $fields['description'] = $item['description'];
             if ($item['category']) {
                 $fields['description'] .= ' (' . $item['category'] . ')';
             }
             $fields['edit_name'] = $feed_title;
             $fields['edit_address'] = $feed_url;
             $fields['edit_action'] = 'link:feed';
             if ($item['pubDate']) {
                 $fields['edit_date'] = gmstrftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate']));
             }
             // update links that already exist in the database
             if (Links::have($item['link'], $anchor, $fields)) {
                 continue;
             }
             // save link in the database
             if (!Links::post($fields)) {
                 Logger::remember('feeds/feeds.php: Impossible to save feed link: ' . Logger::error_pop());
             }
         }
         // one feed has been processed
         $count += 1;
         // remember tick date
         Values::set('feeds.tick.' . $feed_url, $links);
     }
     // cap the number of links used for news
     if (!isset($context['maximum_news']) || !$context['maximum_news']) {
         $context['maximum_news'] = 1000;
     }
     if ($context['maximum_news'] > 10) {
         include_once $context['path_to_root'] . 'links/links.php';
         Links::purge_old_news($context['maximum_news']);
     }
     // compute execution time
     $time = round(get_micro_time() - $start_time, 2);
     // report on work achieved
     if ($count > 1) {
         return 'feeds/feeds.php: ' . $count . ' feeds have been processed (' . $time . ' seconds)' . BR;
     } elseif ($count == 1) {
         return 'feeds/feeds.php: 1 feed has been processed (' . $time . ' seconds)' . BR;
     } else {
         return 'feeds/feeds.php: nothing to do (' . $time . ' seconds)' . BR;
     }
 }