Ejemplo n.º 1
0
 public static function refresh_feed($feed_id)
 {
     $feed = db\get_feed($feed_id);
     echo "Refreshing feed " . $feed->feed_url . " ({$feed_id})\n\n";
     // check if this feed is already being refreshed, and re-queue the job for 30 seconds from now to give the first job a chance to finish
     if ($feed->refresh_in_progress) {
         echo "This feed is already being processed, re-queuing for later\n";
         DeferredTask::queue('FeedTask', 'refresh_feed', $feed_id, 5);
         return;
         // return here which will cause the job runner to re-queue the job
     }
     // mark that this feed is currently being refreshed
     $feed->refresh_started = date('Y-m-d H:i:s');
     $feed->refresh_in_progress = 1;
     $feed->save();
     // only deal with mf2 feeds for now
     try {
         $response = request\get_url($feed->feed_url, true);
         $header_rels = IndieWeb\http_rels($response['headers']);
         $html = $response['body'];
         $mf2 = feeds\parse_mf2($html, $feed->feed_url);
         $hub_url = false;
         if (k($header_rels, 'hub')) {
             $hub_url = $header_rels['hub'][0];
             $hub_url_source = 'http';
         } elseif (k($mf2, 'rels') && k($mf2['rels'], 'hub')) {
             $hub_url = $mf2['rels']['hub'][0];
             $hub_url_source = 'html';
         }
         // check for PuSH info and subscribe to the hub if found
         if ($hub_url) {
             if (k($header_rels, 'self')) {
                 $self_url = $header_rels['self'][0];
                 $self_url_source = 'http';
             } elseif (k($mf2, 'rels') && k($mf2['rels'], 'self')) {
                 $self_url = $mf2['rels']['self'][0];
                 $self_url_source = 'html';
             } else {
                 $self_url = $feed->feed_url;
                 $self_url_source = 'default';
             }
             // Keep track of what the hub URL was last time we saw it
             $last_hub_url = $feed->push_hub_url;
             // Store the new hub and topic
             $feed->push_hub_url = $hub_url;
             $feed->push_topic_url = $self_url;
             // re-subscribe if the expiration date is coming up soon
             // or if the hub has changed
             if ($feed->push_subscribed == 0 || $hub_url != $last_hub_url || $feed->push_expiration && strtotime($feed->push_expiration) - 300 < time()) {
                 echo "Attempting to subscribe to the hub!\n";
                 echo "Hub: " . $feed->push_hub_url . " (found in {$hub_url_source})\n";
                 echo "Topic: " . $feed->push_topic_url . " (found in {$self_url_source})\n";
                 // This will cause the hub to make a GET request to the callback URL which we will to verify
                 $response = request\post($feed->push_hub_url, ['hub.mode' => 'subscribe', 'hub.topic' => $feed->push_topic_url, 'hub.callback' => 'http://' . Config::$hostname . '/push/feed/' . $feed->hash]);
                 echo "Hub responded:\n";
                 echo $response['status'] . "\n";
                 echo $response['body'] . "\n";
             }
             $feed->save();
         }
         // check if there are any h-entry posts
         $info = feeds\find_feed_info($mf2);
         if ($info) {
             #print_r($info);
             foreach ($info['entries'] as $i => $e) {
                 echo "\nProcessing entry {$i}\n";
                 // Find the canonical URL for the entry and fetch the page
                 $entry_url = Mf2\getPlaintext($e, 'url');
                 if ($entry_url) {
                     echo $entry_url . "\n";
                     // Parse the entry for all required info and store in the "entries" table
                     $entry_html = request\get_url($entry_url);
                     if ($entry_html) {
                         $entry_mf2 = feeds\parse_mf2($entry_html, $entry_url);
                         $entries = Mf2\findMicroformatsByType($entry_mf2['items'], 'h-entry');
                         $entry_mf2 = $entries[0];
                         if (!Mf2\isMicroformat($entry_mf2)) {
                             echo "Does not appear to be a microformat\n";
                             continue;
                         }
                         if (!in_array('h-entry', $entry_mf2['type'])) {
                             print_r($entry_mf2);
                             continue;
                         }
                         if (!($entry = ORM::for_table('entries')->where('feed_id', $feed->id)->where('url', $entry_url)->find_one())) {
                             $entry = ORM::for_table('entries')->create();
                             $entry->feed_id = $feed->id;
                             $entry->url = $entry_url;
                         }
                         // Decide whether to store the name, summary and content depending on whether they are unique
                         $name = Mf2\getPlaintext($entry_mf2, 'name');
                         $summary = Mf2\getPlaintext($entry_mf2, 'summary');
                         $content = Mf2\getHtml($entry_mf2, 'content');
                         $content_text = Mf2\getPlaintext($entry_mf2, 'content');
                         // Store the name if it's different from the summary and the content
                         if (!feeds\content_is_equal($name, $summary) && !feeds\content_is_equal($name, $content_text)) {
                             $entry->name = $name;
                             echo "Entry has a name: {$name}\n";
                         } else {
                             $entry->name = '';
                         }
                         // Store the summary if it's different from the content
                         if ($summary && !feeds\content_is_equal($summary, $content_text)) {
                             $entry->summary = $summary;
                             echo "Entry has a summary\n";
                         } else {
                             $entry->summary = '';
                         }
                         $entry->content = $content;
                         $date_string = Mf2\getPlaintext($entry_mf2, 'published');
                         if ($date_string) {
                             try {
                                 $date = new DateTime($date_string);
                                 if ($date) {
                                     $entry->timezone_offset = $date->format('Z');
                                     $date->setTimeZone(new DateTimeZone('UTC'));
                                     $entry->date_published = $date->format('Y-m-d H:i:s');
                                     echo "Published: {$entry->date_published}\n";
                                 }
                             } catch (Exception $e) {
                                 echo "Error parsing date: {$date_string}\n";
                             }
                         }
                         // Set the date published to now if none was found in the entry
                         if (!$entry->date_published) {
                             $entry->date_published = date('Y-m-d H:i:s');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'like-of')) {
                             $entry->like_of_url = Mf2\getPlaintext($entry_mf2, 'like-of');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'repost-of')) {
                             $entry->repost_of_url = Mf2\getPlaintext($entry_mf2, 'repost-of');
                         }
                         // TODO: move this to a helper
                         // finds the URL for a property if the property is a plain string or a nested h-cite
                         if (Mf2\getPlaintext($entry_mf2, 'in-reply-to')) {
                             if (Mf2\isMicroformat($entry_mf2['properties']['in-reply-to'][0])) {
                                 $entry->in_reply_to_url = $entry_mf2['properties']['in-reply-to'][0]['properties']['url'][0];
                             } else {
                                 $entry->in_reply_to_url = Mf2\getPlaintext($entry_mf2, 'in-reply-to');
                             }
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'photo')) {
                             $entry->photo_url = Mf2\getPlaintext($entry_mf2, 'photo');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'video')) {
                             $entry->video_url = Mf2\getPlaintext($entry_mf2, 'video');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'audio')) {
                             $entry->audio_url = Mf2\getPlaintext($entry_mf2, 'audio');
                         }
                         $author_mf2 = false;
                         if (Mf2\hasProp($entry_mf2, 'author')) {
                             $author_mf2 = $entry_mf2['properties']['author'][0];
                         } elseif (Mf2\hasProp($info, 'author')) {
                             $author_mf2 = $info['properties']['author'][0];
                         }
                         if ($author_mf2) {
                             $entry->author_name = Mf2\getPlaintext($author_mf2, 'name');
                             $entry->author_url = Mf2\getPlaintext($author_mf2, 'url');
                             $entry->author_photo = Mf2\getPlaintext($author_mf2, 'photo');
                         } else {
                             echo "NO AUTHOR WAS FOUND!!\n";
                         }
                         if (Mf2\hasProp($entry_mf2, 'like')) {
                             $entry->num_likes = count($entry_mf2['properties']['like']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'repost')) {
                             $entry->num_reposts = count($entry_mf2['properties']['repost']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'comment')) {
                             $entry->num_comments = count($entry_mf2['properties']['comment']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'rsvp')) {
                             $entry->num_rsvps = count($entry_mf2['properties']['rsvp']);
                         }
                         $entry->date_retrieved = date('Y-m-d H:i:s');
                         $entry->date_updated = date('Y-m-d H:i:s');
                         $entry->save();
                         // Add or update all tags for this entry
                         if (Mf2\hasProp($entry_mf2, 'category')) {
                             $entry_tags = array_unique(array_map(function ($c) {
                                 return strtolower(trim($c, '#'));
                             }, $entry_mf2['properties']['category']));
                             foreach ($entry_tags as $tag) {
                                 if (!ORM::for_table('entry_tags')->where('entry_id', $entry->id)->where('tag', $tag)->find_one()) {
                                     $et = ORM::for_table('entry_tags')->create();
                                     $et->entry_id = $entry->id;
                                     $et->tag = $tag;
                                     $et->save();
                                 }
                             }
                         } else {
                             $entry_tags = array();
                         }
                         // TODO: Remove tags that are no longer found in the entry
                         // Add syndication URLs
                         if (Mf2\hasProp($entry_mf2, 'syndication')) {
                             $syndications = array_unique($entry_mf2['properties']['syndication']);
                             foreach ($syndications as $syn) {
                                 if (!ORM::for_table('entry_syndications')->where('entry_id', $entry->id)->where('syndication_url', $syn)->find_one()) {
                                     $es = ORM::for_table('entry_syndications')->create();
                                     $es->entry_id = $entry->id;
                                     $es->syndication_url = $syn;
                                     $es->save();
                                 }
                             }
                         }
                         // TODO: Remove urls that are no longer found in the entry
                         // Run through all the channels that have this feed and add the entry to each channel
                         $sources = ORM::for_table('channel_sources')->where('feed_id', $feed_id)->find_many();
                         foreach ($sources as $source) {
                             #$channel = ORM::for_table('channel')->where('id',$source->channel_id)->find_one();
                             $add = false;
                             if ($source->filter) {
                                 $tags = explode(',', $source->filter);
                                 foreach ($tags as $tag) {
                                     if (preg_match('/\\b' . $tag . '\\b/', $entry->content . "\n" . $entry->name . "\n" . $entry->summary)) {
                                         $add = true;
                                     }
                                     if (in_array(strtolower($tag), $entry_tags)) {
                                         $add = true;
                                     }
                                 }
                             } else {
                                 $add = true;
                             }
                             if ($add) {
                                 $ce = ORM::for_table('channel_entries')->where('channel_id', $source->channel_id)->where('entry_id', $entry->id)->find_one();
                                 if (!$ce) {
                                     $ce = ORM::for_table('channel_entries')->create();
                                     $ce->channel_id = $source->channel_id;
                                     $ce->entry_id = $entry->id;
                                 }
                                 $ce->entry_published = $entry->date_published;
                                 $ce->date_created = date('Y-m-d H:i:s');
                                 $ce->save();
                                 echo "Adding to channel\n";
                             }
                         }
                     } else {
                         // Bad response returned, might be 410 deleted
                         // TODO: Figure out if it's a deleted post or just temporary error
                     }
                 } else {
                     echo "No URL was found for this entry\n";
                 }
             }
         }
         $feed->last_retrieved = date('Y-m-d H:i:s');
     } catch (Exception $e) {
         echo "Error processing feed!\n";
         echo $e->getMessage() . "\n";
         echo $e->getTraceAsString() . "\n";
     }
     // mark complete
     // TODO: add some exception handling that will set this to 0 on errors?
     $feed->refresh_in_progress = 0;
     $feed->save();
 }
Ejemplo n.º 2
0
        $channel->date_created = date('Y-m-d H:i:s');
        $channel->type = 'feeds';
        $channel->save();
        json_response($app, ['result' => 'ok']);
    }
});
$app->post('/channels/discover', function ($format = 'json') use($app) {
    if ($user = require_login_json($app)) {
        $params = $app->request()->params();
        // $feeds = array(
        //   array('url' => 'http://pk.dev/', 'display_url' => friendly_url('http://pk.dev/'), 'type' => 'microformats2'),
        //   array('url' => 'http://pk.dev/articles.atom', 'display_url' => friendly_url('http://pk.dev/articles.atom'), 'type' => 'atom')
        // );
        $feeds = [];
        // Parse the URL and check for microformats h-entry posts, as well as linked rss or atom feeds
        $html = request\get_url($params['url']);
        $url = normalize_url($params['url']);
        if ($html) {
            $mf2 = feeds\parse_mf2($html, $params['url']);
            // check if there are any h-entry posts
            $feed = feeds\find_feed_info($mf2);
            if ($feed) {
                $feeds[] = ['url' => $url, 'display_url' => friendly_url($url), 'icon' => '<i class="icon-microformats"></i>', 'enabled' => true];
            }
            $alternates = feeds\get_alternates($mf2);
            foreach ($alternates as $alt) {
                $feeds[] = ['url' => $alt['url'], 'display_url' => friendly_url($alt['url']), 'icon' => '<i class="fa fa-rss"></i>', 'enabled' => false];
            }
        }
        json_response($app, ['feeds' => $feeds]);
    }