set_sanitize_class() public method

Set which class SimplePie uses for data sanitization
public set_sanitize_class ( $class = 'SimplePie_Sanitize' )
 function fetch_feed2($url)
 {
     require_once ABSPATH . WPINC . '/class-feed.php';
     $feed = new SimplePie();
     $feed->set_sanitize_class('WP_SimplePie_Sanitize_KSES');
     // We must manually overwrite $feed->sanitize because SimplePie's
     // constructor sets it before we have a chance to set the sanitization class
     $feed->sanitize = new WP_SimplePie_Sanitize_KSES();
     $feed->set_cache_class('WP_Feed_Cache');
     $feed->set_file_class('WP_SimplePie_File');
     $feed->set_feed_url($url);
     $feed->force_feed(true);
     /** This filter is documented in wp-includes/class-feed.php */
     $feed->set_cache_duration(apply_filters('wp_feed_cache_transient_lifetime', 12 * HOUR_IN_SECONDS, $url));
     /**
      * Fires just before processing the SimplePie feed object.
      *
      * @since 3.0.0
      *
      * @param object &$feed SimplePie feed object, passed by reference.
      * @param mixed  $url   URL of feed to retrieve. If an array of URLs, the feeds are merged.
      */
     do_action_ref_array('wp_feed_options', array(&$feed, $url));
     $feed->init();
     $feed->handle_content_type();
     if ($feed->error()) {
         return new WP_Error('simplepie-error', $feed->error());
     }
     return $feed;
 }
function powerpress_get_news($feed_url, $limit = 10)
{
    include_once ABSPATH . WPINC . '/feed.php';
    $rss = fetch_feed($feed_url);
    // If feed doesn't work...
    if (is_wp_error($rss)) {
        require_once ABSPATH . WPINC . '/class-feed.php';
        // Try fetching the feed using CURL directly...
        $content = powerpress_remote_fopen($feed_url, false, array(), 3, false, true);
        if (!$content) {
            return false;
        }
        // Load the content in a fetch_feed object...
        $rss = new SimplePie();
        $rss->set_sanitize_class('WP_SimplePie_Sanitize_KSES');
        // We must manually overwrite $feed->sanitize because SimplePie's
        // constructor sets it before we have a chance to set the sanitization class
        $rss->sanitize = new WP_SimplePie_Sanitize_KSES();
        $rss->set_cache_class('WP_Feed_Cache');
        $rss->set_file_class('WP_SimplePie_File');
        $rss->set_raw_data($content);
        $rss->set_cache_duration(apply_filters('wp_feed_cache_transient_lifetime', 12 * HOUR_IN_SECONDS, $feed_url));
        do_action_ref_array('wp_feed_options', array(&$rss, $feed_url));
        $rss->init();
        $rss->set_output_encoding(get_option('blog_charset'));
        $rss->handle_content_type();
        if ($rss->error()) {
            return false;
        }
    }
    $rss_items = $rss->get_items(0, $rss->get_item_quantity($limit));
    // If the feed was erroneously
    if (!$rss_items) {
        $md5 = md5($this->feed);
        delete_transient('feed_' . $md5);
        delete_transient('feed_mod_' . $md5);
        $rss->__destruct();
        unset($rss);
        $rss = fetch_feed($this->feed);
        $rss_items = $rss->get_items(0, $rss->get_item_quantity($num));
        $rss->__destruct();
        unset($rss);
    }
    return $rss_items;
}
 function get_rss_feed($url)
 {
     require_once ABSPATH . WPINC . '/class-feed.php';
     $feed = new SimplePie();
     $feed->set_sanitize_class('WP_SimplePie_Sanitize_KSES');
     $feed->sanitize = new WP_SimplePie_Sanitize_KSES();
     $feed->set_useragent('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36');
     $feed->set_cache_class('WP_Feed_Cache');
     $feed->set_file_class('WP_SimplePie_File');
     $feed->set_feed_url($url);
     $feed->set_cache_duration(apply_filters('wp_feed_cache_transient_lifetime', 12 * HOUR_IN_SECONDS, $url));
     do_action_ref_array('wp_feed_options', array(&$feed, $url));
     $feed->init();
     $feed->handle_content_type();
     if ($feed->error()) {
         return new WP_Error('simplepie-error', $feed->error());
     }
     return $feed;
 }
Beispiel #4
0
/**
 * Build SimplePie object based on RSS or Atom feed from URL.
 *
 * @since 2.8
 *
 * @param mixed $url URL of feed to retrieve. If an array of URLs, the feeds are merged
 * using SimplePie's multifeed feature.
 * See also {@link ​http://simplepie.org/wiki/faq/typical_multifeed_gotchas}
 *
 * @return WP_Error|SimplePie WP_Error object on failure or SimplePie object on success
 */
function fetch_feed($url)
{
    require_once ABSPATH . WPINC . '/class-feed.php';
    $feed = new SimplePie();
    $feed->set_sanitize_class('WP_SimplePie_Sanitize_KSES');
    // We must manually overwrite $feed->sanitize because SimplePie's
    // constructor sets it before we have a chance to set the sanitization class
    $feed->sanitize = new WP_SimplePie_Sanitize_KSES();
    $feed->set_cache_class('WP_Feed_Cache');
    $feed->set_file_class('WP_SimplePie_File');
    $feed->set_feed_url($url);
    $feed->set_cache_duration(apply_filters('wp_feed_cache_transient_lifetime', 12 * HOUR_IN_SECONDS, $url));
    do_action_ref_array('wp_feed_options', array(&$feed, $url));
    $feed->init();
    $feed->handle_content_type();
    if ($feed->error()) {
        return new WP_Error('simplepie-error', $feed->error());
    }
    return $feed;
}
}
////////////////////////////////
// Get RSS/Atom feed
////////////////////////////////
if ($accept !== 'html') {
    debug('--------');
    debug("Attempting to process URL as feed");
    // Send user agent header showing PHP (prevents a HTML response from feedburner)
    $http->userAgentDefault = HumbleHttpAgent::UA_PHP;
    // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance
    SimplePie_HumbleHttpAgent::set_agent($http);
    $feed = new SimplePie();
    // some feeds use the text/html content type - force_feed tells SimplePie to process anyway
    $feed->force_feed(true);
    $feed->set_file_class('SimplePie_HumbleHttpAgent');
    $feed->set_sanitize_class('DisableSimplePieSanitize');
    // need to assign this manually it seems
    $feed->sanitize = new DisableSimplePieSanitize();
    //$feed->set_feed_url($url); // colons appearing in the URL's path get encoded
    $feed->feed_url = $url;
    $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
    $feed->set_timeout(20);
    $feed->enable_cache(false);
    $feed->set_stupidly_fast(true);
    $feed->enable_order_by_date(false);
    // we don't want to do anything to the feed
    $feed->set_url_replacements(array());
    // initialise the feed
    // the @ suppresses notices which on some servers causes a 500 internal server error
    $result = @$feed->init();
    //$feed->handle_content_type();
Beispiel #6
0
 function import_blog($blogID)
 {
     global $importing_blog;
     $importing_blog = $blogID;
     if (isset($_GET['authors'])) {
         return print $this->get_author_form();
     }
     if (isset($_GET['status'])) {
         self::ajax_die($this->get_js_status());
     }
     if (isset($_GET['saveauthors'])) {
         self::ajax_die($this->save_authors());
     }
     //Simpler counting for posts as we load them forwards
     if (isset($this->blogs[$importing_blog]['posts_start_index'])) {
         $start_index = (int) $this->blogs[$importing_blog]['posts_start_index'];
     } else {
         $start_index = 1;
     }
     // This will be positive until we have finished importing posts
     if ($start_index > 0) {
         // Grab all the posts
         $this->blogs[$importing_blog]['mode'] = 'posts';
         do {
             $index = $struct = $entries = array();
             $url = $this->blogs[$importing_blog]['posts_url'];
             $response = $this->oauth_get($url, array('max-results' => MAX_RESULTS, 'start-index' => $start_index));
             if ($response == false) {
                 break;
             }
             // parse the feed
             $feed = new SimplePie();
             $feed->set_item_class('WP_SimplePie_Blog_Item');
             $feed->set_sanitize_class('Blogger_Importer_Sanitize');
             $feed->set_raw_data($response);
             $feed->init();
             foreach ($feed->get_items() as $item) {
                 $blogentry = new BloggerEntry();
                 $blogentry->id = $item->get_id();
                 $blogentry->published = $item->get_published();
                 $blogentry->updated = $item->get_updated();
                 $blogentry->isDraft = $item->get_draft_status($item);
                 $blogentry->title = $item->get_title();
                 $blogentry->content = $item->get_content();
                 $blogentry->author = $item->get_author()->get_name();
                 $blogentry->geotags = $item->get_geotags();
                 $linktypes = array('replies', 'edit', 'self', 'alternate');
                 foreach ($linktypes as $type) {
                     $links = $item->get_links($type);
                     if (!is_null($links)) {
                         foreach ($links as $link) {
                             $blogentry->links[] = array('rel' => $type, 'href' => $link);
                         }
                     }
                 }
                 $cats = $item->get_categories();
                 if (!is_null($cats)) {
                     foreach ($cats as $cat) {
                         $blogentry->categories[] = $cat->term;
                     }
                 }
                 $result = $this->import_post($blogentry);
                 //Ref: Not importing properly http://core.trac.wordpress.org/ticket/19096
                 //Simplified this section to count what is loaded rather than parsing the results again
                 $start_index++;
             }
             $this->blogs[$importing_blog]['posts_start_index'] = $start_index;
             $this->save_vars();
         } while ($this->blogs[$importing_blog]['total_posts'] > $start_index && $this->have_time());
         //have time function will "die" if it's out of time
     }
     if (isset($this->blogs[$importing_blog]['comments_start_index'])) {
         $start_index = (int) $this->blogs[$importing_blog]['comments_start_index'];
     } else {
         $start_index = 1;
     }
     if ($start_index > 0 && $this->blogs[$importing_blog]['total_comments'] > 0) {
         $this->blogs[$importing_blog]['mode'] = 'comments';
         do {
             $index = $struct = $entries = array();
             //So we can link up the comments as we go we need to load them in reverse order
             //Reverse the start index as the GData Blogger feed can't be sorted
             $batch = floor(($this->blogs[$importing_blog]['total_comments'] - $start_index) / MAX_RESULTS) * MAX_RESULTS + 1;
             $response = $this->oauth_get($this->blogs[$importing_blog]['comments_url'], array('max-results' => MAX_RESULTS, 'start-index' => $batch));
             // parse the feed
             $feed = new SimplePie();
             $feed->set_item_class('WP_SimplePie_Blog_Item');
             // Use the standard "stricter" sanitize class for comments
             $feed->set_raw_data($response);
             $feed->init();
             //Reverse the batch so we load the oldest comments first and hence can link up nested comments
             $comments = array_reverse($feed->get_items());
             if (!is_null($comments)) {
                 foreach ($comments as $item) {
                     $blogentry = new BloggerEntry();
                     $blogentry->id = $item->get_id();
                     $blogentry->updated = $item->get_updated();
                     $blogentry->content = $item->get_content();
                     $blogentry->author = $item->get_author()->get_name();
                     $blogentry->authoruri = $item->get_author()->get_link();
                     $blogentry->authoremail = $item->get_author()->get_email();
                     $temp = $item->get_item_tags('http://purl.org/syndication/thread/1.0', 'in-reply-to');
                     foreach ($temp as $t) {
                         if (isset($t['attribs']['']['source'])) {
                             $blogentry->source = $t['attribs']['']['source'];
                         }
                     }
                     //Get the links
                     $linktypes = array('edit', 'self', 'alternate', 'related');
                     foreach ($linktypes as $type) {
                         $links = $item->get_links($type);
                         if (!is_null($links)) {
                             foreach ($links as $link) {
                                 $blogentry->links[] = array('rel' => $type, 'href' => $link);
                             }
                         }
                     }
                     $this->import_comment($blogentry);
                     $start_index++;
                 }
             }
             $this->blogs[$importing_blog]['comments_start_index'] = $start_index;
             $this->save_vars();
         } while ($this->blogs[$importing_blog]['total_comments'] > $start_index && $this->have_time());
     }
     $this->blogs[$importing_blog]['mode'] = 'authors';
     $this->save_vars();
     if (!$this->blogs[$importing_blog]['posts_done'] && !$this->blogs[$importing_blog]['comments_done']) {
         self::ajax_die('nothing');
     }
     do_action('import_done', 'blogger');
     self::ajax_die('done');
 }
function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false, $override_url = false)
{
    require_once "lib/simplepie/simplepie.inc";
    $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
    if ($debug_enabled) {
        _debug("update_rss_feed: start");
    }
    $result = db_query($link, "SELECT id,update_interval,auth_login,\n\t\t\tfeed_url,auth_pass,cache_images,last_updated,\n\t\t\tmark_unread_on_update, owner_uid,\n\t\t\tpubsub_state\n\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'");
    if (db_num_rows($result) == 0) {
        if ($debug_enabled) {
            _debug("update_rss_feed: feed {$feed} NOT FOUND/SKIPPED");
        }
        return false;
    }
    $last_updated = db_fetch_result($result, 0, "last_updated");
    $owner_uid = db_fetch_result($result, 0, "owner_uid");
    $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update"));
    $pubsub_state = db_fetch_result($result, 0, "pubsub_state");
    db_query($link, "UPDATE ttrss_feeds SET last_update_started = NOW()\n\t\t\tWHERE id = '{$feed}'");
    $auth_login = db_fetch_result($result, 0, "auth_login");
    $auth_pass = db_fetch_result($result, 0, "auth_pass");
    $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
    $fetch_url = db_fetch_result($result, 0, "feed_url");
    $feed = db_escape_string($feed);
    /* if ($auth_login && $auth_pass ){
    			$url_parts = array();
    			preg_match("/(^[^:]*):\/\/(.*)/", $fetch_url, $url_parts);
    
    			if ($url_parts[1] && $url_parts[2]) {
    				$fetch_url = $url_parts[1] . "://$auth_login:$auth_pass@" . $url_parts[2];
    			}
    		} */
    if ($override_url) {
        $fetch_url = $override_url;
    }
    if ($debug_enabled) {
        _debug("update_rss_feed: fetching [{$fetch_url}]...");
    }
    // Ignore cache if new feed or manual update.
    $cache_age = is_null($last_updated) || $last_updated == '1970-01-01 00:00:00' ? -1 : get_feed_update_interval($link, $feed) * 60;
    $simplepie_cache_dir = CACHE_DIR . "/simplepie";
    if (!is_dir($simplepie_cache_dir)) {
        mkdir($simplepie_cache_dir);
    }
    $feed_data = fetch_file_contents($fetch_url, false, $auth_login, $auth_pass, false, $no_cache ? 15 : 45);
    if (!$feed_data) {
        global $fetch_last_error;
        if ($debug_enabled) {
            _debug("update_rss_feed: unable to fetch: {$fetch_last_error}");
        }
        $error_escaped = db_escape_string($fetch_last_error);
        db_query($link, "UPDATE ttrss_feeds SET last_error = '{$error_escaped}',\n\t\t\t\t\tlast_updated = NOW() WHERE id = '{$feed}'");
        return;
    }
    $pluginhost = new PluginHost($link);
    $pluginhost->set_debug($debug_enabled);
    $user_plugins = get_pref($link, "_ENABLED_PLUGINS", $owner_uid);
    $pluginhost->load(PLUGINS, $pluginhost::KIND_ALL);
    $pluginhost->load($user_plugins, $pluginhost::KIND_USER, $owner_uid);
    $pluginhost->load_data();
    foreach ($pluginhost->get_hooks($pluginhost::HOOK_FEED_FETCHED) as $plugin) {
        $feed_data = $plugin->hook_feed_fetched($feed_data);
    }
    if ($debug_enabled) {
        _debug("update_rss_feed: fetch done, parsing...");
    }
    $rss = new SimplePie();
    $rss->set_sanitize_class("SanitizeDummy");
    // simplepie ignores the above and creates default sanitizer anyway,
    // so let's override it...
    $rss->sanitize = new SanitizeDummy();
    $rss->set_output_encoding('UTF-8');
    $rss->set_raw_data($feed_data);
    if ($debug_enabled) {
        _debug("feed update interval (sec): " . get_feed_update_interval($link, $feed) * 60);
    }
    $rss->enable_cache(!$no_cache);
    if (!$no_cache) {
        $rss->set_cache_location($simplepie_cache_dir);
        $rss->set_cache_duration($cache_age);
    }
    @$rss->init();
    //		print_r($rss);
    $feed = db_escape_string($feed);
    if (!$rss->error()) {
        // We use local pluginhost here because we need to load different per-user feed plugins
        $pluginhost->run_hooks($pluginhost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
        if ($debug_enabled) {
            _debug("update_rss_feed: processing feed data...");
        }
        //			db_query($link, "BEGIN");
        if (DB_TYPE == "pgsql") {
            $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
        } else {
            $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
        }
        $result = db_query($link, "SELECT title,site_url,owner_uid,\n\t\t\t\t(favicon_last_checked IS NULL OR {$favicon_interval_qpart}) AS\n\t\t\t\t\t\tfavicon_needs_check\n\t\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'");
        $registered_title = db_fetch_result($result, 0, "title");
        $orig_site_url = db_fetch_result($result, 0, "site_url");
        $favicon_needs_check = sql_bool_to_bool(db_fetch_result($result, 0, "favicon_needs_check"));
        $owner_uid = db_fetch_result($result, 0, "owner_uid");
        $site_url = db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245));
        if ($debug_enabled) {
            _debug("update_rss_feed: checking favicon...");
        }
        if ($favicon_needs_check) {
            check_feed_favicon($site_url, $feed, $link);
            db_query($link, "UPDATE ttrss_feeds SET favicon_last_checked = NOW()\n\t\t\t\t\tWHERE id = '{$feed}'");
        }
        if (!$registered_title || $registered_title == "[Unknown]") {
            $feed_title = db_escape_string($rss->get_title());
            if ($debug_enabled) {
                _debug("update_rss_feed: registering title: {$feed_title}");
            }
            db_query($link, "UPDATE ttrss_feeds SET\n\t\t\t\t\ttitle = '{$feed_title}' WHERE id = '{$feed}'");
        }
        if ($site_url && $orig_site_url != $site_url) {
            db_query($link, "UPDATE ttrss_feeds SET\n\t\t\t\t\tsite_url = '{$site_url}' WHERE id = '{$feed}'");
        }
        if ($debug_enabled) {
            _debug("update_rss_feed: loading filters & labels...");
        }
        $filters = load_filters($link, $feed, $owner_uid);
        $labels = get_all_labels($link, $owner_uid);
        if ($debug_enabled) {
            //print_r($filters);
            _debug("update_rss_feed: " . count($filters) . " filters loaded.");
        }
        $items = $rss->get_items();
        if (!is_array($items)) {
            if ($debug_enabled) {
                _debug("update_rss_feed: no articles found.");
            }
            db_query($link, "UPDATE ttrss_feeds\n\t\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'");
            return;
            // no articles
        }
        if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) {
            if ($debug_enabled) {
                _debug("update_rss_feed: checking for PUSH hub...");
            }
            $feed_hub_url = false;
            $links = $rss->get_links('hub');
            if ($links && is_array($links)) {
                foreach ($links as $l) {
                    $feed_hub_url = $l;
                    break;
                }
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: feed hub url: {$feed_hub_url}");
            }
            if ($feed_hub_url && function_exists('curl_init') && !ini_get("open_basedir")) {
                require_once 'lib/pubsubhubbub/subscriber.php';
                $callback_url = get_self_url_prefix() . "/public.php?op=pubsub&id={$feed}";
                $s = new Subscriber($feed_hub_url, $callback_url);
                $rc = $s->subscribe($fetch_url);
                if ($debug_enabled) {
                    _debug("update_rss_feed: feed hub url found, subscribe request sent.");
                }
                db_query($link, "UPDATE ttrss_feeds SET pubsub_state = 1\n\t\t\t\t\t\tWHERE id = '{$feed}'");
            }
        }
        if ($debug_enabled) {
            _debug("update_rss_feed: processing articles...");
        }
        foreach ($items as $item) {
            if ($_REQUEST['xdebug'] == 3) {
                print_r($item);
            }
            $entry_guid = $item->get_id();
            if (!$entry_guid) {
                $entry_guid = $item->get_link();
            }
            if (!$entry_guid) {
                $entry_guid = make_guid_from_title($item->get_title());
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: guid {$entry_guid}");
            }
            if (!$entry_guid) {
                continue;
            }
            $entry_guid = "{$owner_uid},{$entry_guid}";
            $entry_timestamp = "";
            $entry_timestamp = strtotime($item->get_date());
            if ($entry_timestamp == -1 || !$entry_timestamp) {
                $entry_timestamp = time();
                $no_orig_date = 'true';
            } else {
                $no_orig_date = 'false';
            }
            $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
            if ($debug_enabled) {
                _debug("update_rss_feed: date {$entry_timestamp} [{$entry_timestamp_fmt}]");
            }
            $entry_title = $item->get_title();
            $entry_link = rewrite_relative_url($site_url, $item->get_link());
            if ($debug_enabled) {
                _debug("update_rss_feed: title {$entry_title}");
                _debug("update_rss_feed: link {$entry_link}");
            }
            if (!$entry_title) {
                $entry_title = date("Y-m-d H:i:s", $entry_timestamp);
            }
            $entry_content = $item->get_content();
            if (!$entry_content) {
                $entry_content = $item->get_description();
            }
            if ($_REQUEST["xdebug"] == 2) {
                print "update_rss_feed: content: ";
                print $entry_content;
                print "\n";
            }
            $entry_comments = $item->data["comments"];
            if ($item->get_author()) {
                $entry_author_item = $item->get_author();
                $entry_author = $entry_author_item->get_name();
                if (!$entry_author) {
                    $entry_author = $entry_author_item->get_email();
                }
                $entry_author = db_escape_string($entry_author);
            }
            $entry_guid = db_escape_string(mb_substr($entry_guid, 0, 245));
            $entry_comments = db_escape_string(mb_substr($entry_comments, 0, 245));
            $entry_author = db_escape_string(mb_substr($entry_author, 0, 245));
            $num_comments = $item->get_item_tags('http://purl.org/rss/1.0/modules/slash/', 'comments');
            if (is_array($num_comments) && is_array($num_comments[0])) {
                $num_comments = (int) $num_comments[0]["data"];
            } else {
                $num_comments = 0;
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: num_comments: {$num_comments}");
                _debug("update_rss_feed: looking for tags [1]...");
            }
            // parse <category> entries into tags
            $additional_tags = array();
            $additional_tags_src = $item->get_categories();
            if (is_array($additional_tags_src)) {
                foreach ($additional_tags_src as $tobj) {
                    array_push($additional_tags, $tobj->get_term());
                }
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: category tags:");
                print_r($additional_tags);
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: looking for tags [2]...");
            }
            $entry_tags = array_unique($additional_tags);
            for ($i = 0; $i < count($entry_tags); $i++) {
                $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
            }
            if ($debug_enabled) {
                //_debug("update_rss_feed: unfiltered tags found:");
                //print_r($entry_tags);
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: done collecting data.");
            }
            // TODO: less memory-hungry implementation
            if ($debug_enabled) {
                _debug("update_rss_feed: applying plugin filters..");
            }
            // FIXME not sure if owner_uid is a good idea here, we may have a base entry without user entry (?)
            $result = db_query($link, "SELECT plugin_data,title,content,link,tag_cache,author FROM ttrss_entries, ttrss_user_entries\n\t\t\t\t\tWHERE ref_id = id AND guid = '" . db_escape_string($entry_guid) . "' AND owner_uid = {$owner_uid}");
            if (db_num_rows($result) != 0) {
                $entry_plugin_data = db_fetch_result($result, 0, "plugin_data");
                $stored_article = array("title" => db_fetch_result($result, 0, "title"), "content" => db_fetch_result($result, 0, "content"), "link" => db_fetch_result($result, 0, "link"), "tags" => explode(",", db_fetch_result($result, 0, "tag_cache")), "author" => db_fetch_result($result, 0, "author"));
            } else {
                $entry_plugin_data = "";
                $stored_article = array();
            }
            $article = array("owner_uid" => $owner_uid, "guid" => $entry_guid, "title" => $entry_title, "content" => $entry_content, "link" => $entry_link, "tags" => $entry_tags, "plugin_data" => $entry_plugin_data, "author" => $entry_author, "stored" => $stored_article);
            foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $plugin) {
                $article = $plugin->hook_article_filter($article);
            }
            $entry_tags = $article["tags"];
            $entry_guid = db_escape_string($entry_guid);
            $entry_content = db_escape_string($article["content"], false);
            $entry_title = db_escape_string($article["title"]);
            $entry_author = db_escape_string($article["author"]);
            $entry_link = db_escape_string($article["link"]);
            $entry_plugin_data = db_escape_string($article["plugin_data"]);
            if ($debug_enabled) {
                _debug("update_rss_feed: plugin data: {$entry_plugin_data}");
            }
            if ($cache_images && is_writable(CACHE_DIR . '/images')) {
                $entry_content = cache_images($entry_content, $site_url, $debug_enabled);
            }
            $content_hash = "SHA1:" . sha1($entry_content);
            db_query($link, "BEGIN");
            $result = db_query($link, "SELECT id FROM\tttrss_entries\n\t\t\t\t\tWHERE guid = '{$entry_guid}'");
            if (db_num_rows($result) == 0) {
                if ($debug_enabled) {
                    _debug("update_rss_feed: base guid [{$entry_guid}] not found");
                }
                // base post entry does not exist, create it
                $result = db_query($link, "INSERT INTO ttrss_entries\n\t\t\t\t\t\t\t(title,\n\t\t\t\t\t\t\tguid,\n\t\t\t\t\t\t\tlink,\n\t\t\t\t\t\t\tupdated,\n\t\t\t\t\t\t\tcontent,\n\t\t\t\t\t\t\tcontent_hash,\n\t\t\t\t\t\t\tcached_content,\n\t\t\t\t\t\t\tno_orig_date,\n\t\t\t\t\t\t\tdate_updated,\n\t\t\t\t\t\t\tdate_entered,\n\t\t\t\t\t\t\tcomments,\n\t\t\t\t\t\t\tnum_comments,\n\t\t\t\t\t\t\tplugin_data,\n\t\t\t\t\t\t\tauthor)\n\t\t\t\t\t\tVALUES\n\t\t\t\t\t\t\t('{$entry_title}',\n\t\t\t\t\t\t\t'{$entry_guid}',\n\t\t\t\t\t\t\t'{$entry_link}',\n\t\t\t\t\t\t\t'{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\t'{$entry_content}',\n\t\t\t\t\t\t\t'{$content_hash}',\n\t\t\t\t\t\t\t'',\n\t\t\t\t\t\t\t{$no_orig_date},\n\t\t\t\t\t\t\tNOW(),\n\t\t\t\t\t\t\tNOW(),\n\t\t\t\t\t\t\t'{$entry_comments}',\n\t\t\t\t\t\t\t'{$num_comments}',\n\t\t\t\t\t\t\t'{$entry_plugin_data}',\n\t\t\t\t\t\t\t'{$entry_author}')");
                $article_labels = array();
            } else {
                // we keep encountering the entry in feeds, so we need to
                // update date_updated column so that we don't get horrible
                // dupes when the entry gets purged and reinserted again e.g.
                // in the case of SLOW SLOW OMG SLOW updating feeds
                $base_entry_id = db_fetch_result($result, 0, "id");
                db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()\n\t\t\t\t\t\tWHERE id = '{$base_entry_id}'");
                $article_labels = get_article_labels($link, $base_entry_id, $owner_uid);
            }
            // now it should exist, if not - bad luck then
            $result = db_query($link, "SELECT\n\t\t\t\t\t\tid,content_hash,no_orig_date,title,plugin_data,\n\t\t\t\t\t\t" . SUBSTRING_FOR_DATE . "(date_updated,1,19) as date_updated,\n\t\t\t\t\t\t" . SUBSTRING_FOR_DATE . "(updated,1,19) as updated,\n\t\t\t\t\t\tnum_comments\n\t\t\t\t\tFROM\n\t\t\t\t\t\tttrss_entries\n\t\t\t\t\tWHERE guid = '{$entry_guid}'");
            $entry_ref_id = 0;
            $entry_int_id = 0;
            if (db_num_rows($result) == 1) {
                if ($debug_enabled) {
                    _debug("update_rss_feed: base guid [{$entry_guid}] found, checking for user record");
                }
                // this will be used below in update handler
                $orig_content_hash = db_fetch_result($result, 0, "content_hash");
                $orig_title = db_fetch_result($result, 0, "title");
                $orig_num_comments = db_fetch_result($result, 0, "num_comments");
                $orig_date_updated = strtotime(db_fetch_result($result, 0, "date_updated"));
                $orig_plugin_data = db_fetch_result($result, 0, "plugin_data");
                $ref_id = db_fetch_result($result, 0, "id");
                $entry_ref_id = $ref_id;
                // check for user post link to main table
                // do we allow duplicate posts with same GUID in different feeds?
                if (get_pref($link, "ALLOW_DUPLICATE_POSTS", $owner_uid, false)) {
                    $dupcheck_qpart = "AND (feed_id = '{$feed}' OR feed_id IS NULL)";
                } else {
                    $dupcheck_qpart = "";
                }
                /* Collect article tags here so we could filter by them: */
                $article_filters = get_article_filters($filters, $entry_title, $entry_content, $entry_link, $entry_timestamp, $entry_author, $entry_tags);
                if ($debug_enabled) {
                    _debug("update_rss_feed: article filters: ");
                    if (count($article_filters) != 0) {
                        print_r($article_filters);
                    }
                }
                if (find_article_filter($article_filters, "filter")) {
                    db_query($link, "COMMIT");
                    // close transaction in progress
                    continue;
                }
                $score = calculate_article_score($article_filters);
                if ($debug_enabled) {
                    _debug("update_rss_feed: initial score: {$score}");
                }
                $query = "SELECT ref_id, int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}'\n\t\t\t\t\t\t\t{$dupcheck_qpart}";
                //					if ($_REQUEST["xdebug"]) print "$query\n";
                $result = db_query($link, $query);
                // okay it doesn't exist - create user entry
                if (db_num_rows($result) == 0) {
                    if ($debug_enabled) {
                        _debug("update_rss_feed: user record not found, creating...");
                    }
                    if ($score >= -500 && !find_article_filter($article_filters, 'catchup')) {
                        $unread = 'true';
                        $last_read_qpart = 'NULL';
                    } else {
                        $unread = 'false';
                        $last_read_qpart = 'NOW()';
                    }
                    if (find_article_filter($article_filters, 'mark') || $score > 1000) {
                        $marked = 'true';
                    } else {
                        $marked = 'false';
                    }
                    if (find_article_filter($article_filters, 'publish')) {
                        $published = 'true';
                    } else {
                        $published = 'false';
                    }
                    // N-grams
                    if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
                        $result = db_query($link, "SELECT COUNT(*) AS similar FROM\n\t\t\t\t\t\t\t\t\tttrss_entries,ttrss_user_entries\n\t\t\t\t\t\t\t\tWHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'\n\t\t\t\t\t\t\t\t\tAND similarity(title, '{$entry_title}') >= " . _NGRAM_TITLE_DUPLICATE_THRESHOLD . "\n\t\t\t\t\t\t\t\t\tAND owner_uid = {$owner_uid}");
                        $ngram_similar = db_fetch_result($result, 0, "similar");
                        if ($debug_enabled) {
                            _debug("update_rss_feed: N-gram similar results: {$ngram_similar}");
                        }
                        if ($ngram_similar > 0) {
                            $unread = 'false';
                        }
                    }
                    $result = db_query($link, "INSERT INTO ttrss_user_entries\n\t\t\t\t\t\t\t\t(ref_id, owner_uid, feed_id, unread, last_read, marked,\n\t\t\t\t\t\t\t\t\tpublished, score, tag_cache, label_cache, uuid)\n\t\t\t\t\t\t\tVALUES ('{$ref_id}', '{$owner_uid}', '{$feed}', {$unread},\n\t\t\t\t\t\t\t\t{$last_read_qpart}, {$marked}, {$published}, '{$score}', '', '', '')");
                    if (PUBSUBHUBBUB_HUB && $published == 'true') {
                        $rss_link = get_self_url_prefix() . "/public.php?op=rss&id=-2&key=" . get_feed_access_key($link, -2, false, $owner_uid);
                        $p = new Publisher(PUBSUBHUBBUB_HUB);
                        $pubsub_result = $p->publish_update($rss_link);
                    }
                    $result = db_query($link, "SELECT int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}' AND\n\t\t\t\t\t\t\t\tfeed_id = '{$feed}' LIMIT 1");
                    if (db_num_rows($result) == 1) {
                        $entry_int_id = db_fetch_result($result, 0, "int_id");
                    }
                } else {
                    if ($debug_enabled) {
                        _debug("update_rss_feed: user record FOUND");
                    }
                    $entry_ref_id = db_fetch_result($result, 0, "ref_id");
                    $entry_int_id = db_fetch_result($result, 0, "int_id");
                }
                if ($debug_enabled) {
                    _debug("update_rss_feed: RID: {$entry_ref_id}, IID: {$entry_int_id}");
                }
                $post_needs_update = false;
                $update_insignificant = false;
                if ($orig_num_comments != $num_comments) {
                    $post_needs_update = true;
                    $update_insignificant = true;
                }
                if ($entry_plugin_data != $orig_plugin_data) {
                    $post_needs_update = true;
                    $update_insignificant = true;
                }
                if ($content_hash != $orig_content_hash) {
                    $post_needs_update = true;
                    $update_insignificant = false;
                }
                if (db_escape_string($orig_title) != $entry_title) {
                    $post_needs_update = true;
                    $update_insignificant = false;
                }
                // if post needs update, update it and mark all user entries
                // linking to this post as updated
                if ($post_needs_update) {
                    if (defined('DAEMON_EXTENDED_DEBUG')) {
                        _debug("update_rss_feed: post {$entry_guid} needs update...");
                    }
                    //						print "<!-- post $orig_title needs update : $post_needs_update -->";
                    db_query($link, "UPDATE ttrss_entries\n\t\t\t\t\t\t\tSET title = '{$entry_title}', content = '{$entry_content}',\n\t\t\t\t\t\t\t\tcontent_hash = '{$content_hash}',\n\t\t\t\t\t\t\t\tupdated = '{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\t\tnum_comments = '{$num_comments}',\n\t\t\t\t\t\t\t\tplugin_data = '{$entry_plugin_data}'\n\t\t\t\t\t\t\tWHERE id = '{$ref_id}'");
                    if (!$update_insignificant) {
                        if ($mark_unread_on_update) {
                            db_query($link, "UPDATE ttrss_user_entries\n\t\t\t\t\t\t\t\t\tSET last_read = null, unread = true WHERE ref_id = '{$ref_id}'");
                        }
                    }
                }
            }
            db_query($link, "COMMIT");
            if ($debug_enabled) {
                _debug("update_rss_feed: assigning labels...");
            }
            assign_article_to_label_filters($link, $entry_ref_id, $article_filters, $owner_uid, $article_labels);
            if ($debug_enabled) {
                _debug("update_rss_feed: looking for enclosures...");
            }
            // enclosures
            $enclosures = array();
            $encs = $item->get_enclosures();
            if (is_array($encs)) {
                foreach ($encs as $e) {
                    $e_item = array($e->link, $e->type, $e->length);
                    array_push($enclosures, $e_item);
                }
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: article enclosures:");
                print_r($enclosures);
            }
            db_query($link, "BEGIN");
            foreach ($enclosures as $enc) {
                $enc_url = db_escape_string($enc[0]);
                $enc_type = db_escape_string($enc[1]);
                $enc_dur = db_escape_string($enc[2]);
                $result = db_query($link, "SELECT id FROM ttrss_enclosures\n\t\t\t\t\t\tWHERE content_url = '{$enc_url}' AND post_id = '{$entry_ref_id}'");
                if (db_num_rows($result) == 0) {
                    db_query($link, "INSERT INTO ttrss_enclosures\n\t\t\t\t\t\t\t(content_url, content_type, title, duration, post_id) VALUES\n\t\t\t\t\t\t\t('{$enc_url}', '{$enc_type}', '', '{$enc_dur}', '{$entry_ref_id}')");
                }
            }
            db_query($link, "COMMIT");
            // check for manual tags (we have to do it here since they're loaded from filters)
            foreach ($article_filters as $f) {
                if ($f["type"] == "tag") {
                    $manual_tags = trim_array(explode(",", $f["param"]));
                    foreach ($manual_tags as $tag) {
                        if (tag_is_valid($tag)) {
                            array_push($entry_tags, $tag);
                        }
                    }
                }
            }
            // Skip boring tags
            $boring_tags = trim_array(explode(",", mb_strtolower(get_pref($link, 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
            $filtered_tags = array();
            $tags_to_cache = array();
            if ($entry_tags && is_array($entry_tags)) {
                foreach ($entry_tags as $tag) {
                    if (array_search($tag, $boring_tags) === false) {
                        array_push($filtered_tags, $tag);
                    }
                }
            }
            $filtered_tags = array_unique($filtered_tags);
            if ($debug_enabled) {
                _debug("update_rss_feed: filtered article tags:");
                print_r($filtered_tags);
            }
            // Save article tags in the database
            if (count($filtered_tags) > 0) {
                db_query($link, "BEGIN");
                foreach ($filtered_tags as $tag) {
                    $tag = sanitize_tag($tag);
                    $tag = db_escape_string($tag);
                    if (!tag_is_valid($tag)) {
                        continue;
                    }
                    $result = db_query($link, "SELECT id FROM ttrss_tags\n\t\t\t\t\t\t\tWHERE tag_name = '{$tag}' AND post_int_id = '{$entry_int_id}' AND\n\t\t\t\t\t\t\towner_uid = '{$owner_uid}' LIMIT 1");
                    if ($result && db_num_rows($result) == 0) {
                        db_query($link, "INSERT INTO ttrss_tags\n\t\t\t\t\t\t\t\t\t(owner_uid,tag_name,post_int_id)\n\t\t\t\t\t\t\t\t\tVALUES ('{$owner_uid}','{$tag}', '{$entry_int_id}')");
                    }
                    array_push($tags_to_cache, $tag);
                }
                /* update the cache */
                $tags_to_cache = array_unique($tags_to_cache);
                $tags_str = db_escape_string(join(",", $tags_to_cache));
                db_query($link, "UPDATE ttrss_user_entries\n\t\t\t\t\t\tSET tag_cache = '{$tags_str}' WHERE ref_id = '{$entry_ref_id}'\n\t\t\t\t\t\tAND owner_uid = {$owner_uid}");
                db_query($link, "COMMIT");
            }
            if (get_pref($link, "AUTO_ASSIGN_LABELS", $owner_uid, false)) {
                if ($debug_enabled) {
                    _debug("update_rss_feed: auto-assigning labels...");
                }
                foreach ($labels as $label) {
                    $caption = $label["caption"];
                    if (preg_match("/\\b{$caption}\\b/i", "{$tags_str} " . strip_tags($entry_content) . " {$entry_title}")) {
                        if (!labels_contains_caption($article_labels, $caption)) {
                            label_add_article($link, $entry_ref_id, $caption, $owner_uid);
                        }
                    }
                }
            }
            if ($debug_enabled) {
                _debug("update_rss_feed: article processed");
            }
        }
        if (!$last_updated) {
            if ($debug_enabled) {
                _debug("update_rss_feed: new feed, catching it up...");
            }
            catchup_feed($link, $feed, false, $owner_uid);
        }
        if ($debug_enabled) {
            _debug("purging feed...");
        }
        purge_feed($link, $feed, 0, $debug_enabled);
        db_query($link, "UPDATE ttrss_feeds\n\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'");
        //			db_query($link, "COMMIT");
    } else {
        $error_msg = db_escape_string(mb_substr($rss->error(), 0, 245));
        if ($debug_enabled) {
            _debug("update_rss_feed: error fetching feed: {$error_msg}");
        }
        db_query($link, "UPDATE ttrss_feeds SET last_error = '{$error_msg}',\n\t\t\t\t\tlast_updated = NOW() WHERE id = '{$feed}'");
    }
    unset($rss);
    if ($debug_enabled) {
        _debug("update_rss_feed: done");
    }
}
Beispiel #8
0
$sql = 'SELECT * FROM lylina_feeds';
$feeds = runSQL($sql);
$feeds_parse = array();
$feed_count = 0;
//$feeds_parse['url'] = array();
//$feeds_parse['curl'] = array();
//$feeds_parse['data'] = array();
//$feeds_parse['id'] = array();
//$feeds_parse['mirror_url'] = array();
$master_curl = curl_multi_init();
//$data = new SimplePie_Cache_Extras();
$data = new SimplePie();
$data->set_cache_duration(300);
$data->set_cache_location(MAGPIE_CACHE_DIR);
$data->enable_cache(true);
$data->set_sanitize_class('SimplePie_Sanitize_Null');
$data->set_autodiscovery_level(SIMPLEPIE_LOCATOR_ALL);
//$data->set_stupidly_fast(true);
// Don't need this
$data->enable_order_by_date(false);
foreach ($feeds as $feed) {
    if ($conf['debug'] == 'true') {
        print 'Fetching ' . $feed['url'] . " ";
    }
    //	if($conf['debug'] == 'true') flush();
    $enc = '';
    //	$data = fetch_rss($feed['url']);
    /*	if(file_exists("mirror/" . md5($feed['url']) . ".xml")) {
    //		$data = new SimplePie_Cache_Extras("mirror/" . md5($feed['url']) . ".xml");
    		$data->set_feed_url("mirror/" . md5($feed['url']) . ".xml");
    //		$data->set_cache_duration(300);
Beispiel #9
0
 function get()
 {
     $purifier_config = HTMLPurifier_Config::createDefault();
     $purifier_config->set('Cache.SerializerPath', 'cache');
     // TODO: This feature is very nice, but breaks titles now that we purify them. Titles only need their entities fixed, so we shouldn't really purify them allowing us to turn this back on
     #       $purifier_config->set('AutoFormat.Linkify', true);
     // Allow flash embeds in newer versions of purifier
     $purifier_config->set('HTML.SafeObject', true);
     $purifier_config->set('Output.FlashCompat', true);
     $purifier_config->set('HTML.FlashAllowFullScreen', true);
     $purifier = new HTMLPurifier($purifier_config);
     $query = 'SELECT * FROM lylina_feeds';
     $feeds = $this->db->GetAll($query);
     $pie = new SimplePie();
     $pie->enable_cache(false);
     $pie->set_sanitize_class('SimplePie_Sanitize_Null');
     $pie->set_autodiscovery_level(SIMPLEPIE_LOCATOR_ALL);
     $pie->enable_order_by_date(false);
     // Array storing feeds which need to be parsed
     $feeds_parse = array();
     // Keep track of how many we need to parse
     $feeds_count = 0;
     // Build array of feeds to fetch and their metadata
     foreach ($feeds as $feed) {
         // Track our cache
         $mod_time = -1;
         $cache_path = 'cache/' . md5($feed['url']) . '.xml';
         if (file_exists($cache_path)) {
             $mod_time = @filemtime($cache_path);
             $filemd5 = @md5_file($cache_path);
         } else {
             $mod_time = -1;
             $filemd5 = 0;
         }
         // If our cache is older than 5 minutes, or doesn't exist, fetch new feeds
         if (time() - $mod_time > 300 || $mod_time == -1) {
             #if(true) {
             $feeds_parse[$feeds_count] = array();
             $feeds_parse[$feeds_count]['url'] = $feed['url'];
             $feeds_parse[$feeds_count]['id'] = $feed['id'];
             $feeds_parse[$feeds_count]['name'] = $feed['name'];
             $feeds_parse[$feeds_count]['icon'] = $feed['favicon_url'];
             $feeds_parse[$feeds_count]['cache_path'] = $cache_path;
             $feeds_parse[$feeds_count]['filemd5'] = $filemd5;
             $feeds_parse[$feeds_count]['mod'] = $mod_time;
             $feeds_count++;
         }
     }
     // Get the data for feeds we need to parse
     $curl = new Curl_Get();
     $feeds_data = $curl->multi_get($feeds_parse);
     // Handle the data and parse the feeds
     for ($n = 0; $n < count($feeds_parse); $n++) {
         $data = $feeds_data[$n];
         $info = $feeds_parse[$n];
         // If we got an error back from Curl
         if (isset($data['error']) && $data['error'] > 0) {
             // Should be logged
             error_log("Curl error: " . $data['error']);
             // If the feed has been retrieved with content, we should save it
         } elseif ($data['data'] != NULL) {
             file_put_contents($info['cache_path'], $data['data']);
             // Otherwise we've gotten an error on the feed, or there is nothing new, let's freshen the cache
         } else {
             touch($info['cache_path']);
         }
     }
     // Clear the file stat cache so we get good data on feed mirror size changes
     clearstatcache();
     for ($n = 0; $n < count($feeds_parse); $n++) {
         $data = $feeds_data[$n];
         $info = $feeds_parse[$n];
         if ($data['data'] != NULL && md5_file($info['cache_path']) !== $info['filemd5']) {
             $pie->set_feed_url($info['cache_path']);
             $pie->init();
             // If SimplePie finds a new RSS URL, let's update our cache
             if ($pie->feed_url != $info['url']) {
                 $this->db->Execute('UPDATE lylina_items SET url=?, fallback_url=? WHERE id=?', array($pie->feed_url, $info['url'], $info['id']));
             }
             // Update the real feed title - users who already have the feed added won't see the change
             // This is to prevent garbage names from OPML imports, which eventually won't be a problem,
             // but it's probably a good idea to keep the global title current anyway
             if ($pie->get_title() != $info['name']) {
                 $this->db->Execute('UPDATE lylina_feeds SET name=? WHERE id=?', array($pie->get_title(), $info['id']));
             }
             // TODO: Favicon handling isn't real pretty
             // If we have a new favicon URL, no cache, or stale cache, update cache
             if (!file_exists('cache/' . md5($info['url']) . '.ico') || time() - filemtime('cache/' . md5($info['url']) . '.ico') > 7 * 24 * 60 * 60 || $pie->get_favicon() != $info['icon']) {
                 $this->update_favicon($info, $pie);
             }
             // If we can successfully parse the file, format them
             if ($pie->get_items()) {
                 $this->insert_items($info, $pie, $purifier);
             }
         } else {
             // TODO: Provide debugging
         }
     }
 }