コード例 #1
0
ファイル: class.YouTubeCrawler.php プロジェクト: dgw/ThinkUp
 /**
  *  Collects and stores information about the users videos from the YouTube APIs
  *  Currently collects and stores:
  *   - Basic video information such as title, author, description and location the video was shot in (if available)
  *  - Replies to the video
  *      -- This uses the YouTube V2 API due to the V3 API currently not supporting replies
  *   - All time counts for likes, dislikes, views, average view duration, average view percentage, favorites added,
  *   favorites removed, shares, subscribers gained and subscribers lost
  *     -- The totals for these are stored in the videos table, a history of these totals is stored in the
  *     count_history table under a type of [metric]_all_time and date of todays date
  *    -- A record of these metrics for indivdual days is also saved in the count_history table under a type of
  *    [metric] and date of the day the metric represents usually two days ago due to a delay in the availability
  *      of data from the Analytics API
  * @return null
  */
 public function fetchInstanceUserVideos()
 {
     $video_dao = DAOFactory::getDAO('VideoDAO');
     $user_dao = DAOFactory::getDAO('UserDAO');
     $post_dao = DAOFactory::getDAO('PostDAO');
     $count_history_dao = DAOFactory::getDAO('CountHistoryDAO');
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     // Get the users upload playlist ID
     $fields_for_ids = array('part' => 'contentDetails,statistics', 'mine' => 'true');
     $various_ids = $this->youtube_api_accessor->apiRequest('channels', $this->access_token, $fields_for_ids);
     $upload_id = $various_ids->items[0]->contentDetails->relatedPlaylists->uploads;
     // Also get their channel ID as we'll need it later on
     $channel_id = $various_ids->items[0]->id;
     // There are some required attributes about the author that YouTube doesn't return for the videos so we need
     // to query the database for them
     $author_details = $user_dao->getDetails($this->instance->network_user_id, 'youtube');
     $user_id = $this->instance->network_user_id;
     // Update the users subscriber count
     $subscriber_count = $various_ids->items[0]->statistics->subscriberCount;
     $author_details->follower_count = $subscriber_count;
     $user_dao->updateUser($author_details);
     $count_history_dao->insert($user_id, 'youtube', $subscriber_count, null, 'subscriber_count');
     // Calculate the time at which we should stop fetching videos
     $end_time = time() + $this->max_crawl_time;
     // Keep track of if we finished the crawl early due to timing out
     $had_to_finish_early = false;
     // Check if we already loaded all the old posts for this user
     $archive_loaded = $instance->is_archive_loaded_posts;
     // If the archive isn't loaded yet keep track of how many times we've tried to load it
     if (!$archive_loaded) {
         $attempts = $count_history_dao->getLatestCountByNetworkUserIDAndType($user_id, 'youtube', 'youtube_archive_attempts');
         if ($attempts == null) {
             // If this is the first crawler run
             $attempts['count'] = 0;
         }
         $attempts['count']++;
         $count_history_dao->insert($user_id, 'youtube', $attempts['count'], null, 'youtube_archive_attempts', null);
     }
     // Now page through their videos collecting the data
     $videos_fields = array('part' => 'snippet', 'maxResults' => '25', 'playlistId' => $upload_id, 'pageToken' => null);
     // We may get multiple pages
     do {
         // This is a page of IDs of videos the user has uploaded
         $user_videos = $this->youtube_api_accessor->apiRequest('playlistItems', $this->access_token, $videos_fields);
         // For each video store the relevant details about it
         foreach ($user_videos->items as $video) {
             // If we've hit the max crawl time stop
             if (time() >= $end_time) {
                 $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
                 $had_to_finish_early = true;
                 break 2;
             }
             $video_id = $video->snippet->resourceId->videoId;
             // Get the title, description, likes, dislikes, views, and details about where
             // the video was taken from the data API
             $video_fields = array('id' => $video_id, 'part' => 'statistics,id,snippet,recordingDetails,status');
             $video_details = $this->youtube_api_accessor->apiRequest('videos', $this->access_token, $video_fields);
             $item = $video_details->items[0];
             // Check we haven't used up our quota
             if (isset($video_details->error)) {
                 $this->logger->logError('Error querying YouTube Data API V3 ', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $video_attributes['post_text'] = $item->snippet->title;
             $video_attributes['description'] = $item->snippet->description;
             $video_attributes['likes'] = $item->statistics->likeCount;
             $video_attributes['dislikes'] = $item->statistics->dislikeCount;
             $video_attributes['views'] = $item->statistics->viewCount;
             // Keep track of these all time counts
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['likes'], $video_id, 'likes_all_time');
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['dislikes'], $video_id, 'dislikes_all_time');
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['views'], $video_id, 'views_all_time');
             $video_attributes['pub_date'] = $item->snippet->publishedAt;
             $video_attributes['post_id'] = $item->id;
             $video_attributes['location'] = $item->recordingDetails->locationDescription;
             $video_attributes['place'] = $item->recordingDetails->locationDescription;
             if (isset($item->recordingDetails->latitude)) {
                 $video_attributes['geo'] = $item->recordingDetails->latitude . "," . $item->recordingDetails->longitude;
             }
             $video_attributes['is_protected'] = self::determinePrivacyStatus($item->status->privacyStatus);
             $today = date('Y-m-d');
             $upload_date = substr($item->snippet->publishedAt, 0, 10);
             // Get the favourites added, favourites removed, shares, subscribers gained, subscribers lost
             // estimated minuites watched, average view duration, average view percentage
             $analytics_fields = array('ids' => 'channel==' . $channel_id, 'start-date' => $upload_date, 'end-date' => $today, 'metrics' => 'favoritesAdded,favoritesRemoved,shares,subscribersGained,subscribersLost,' . 'estimatedMinutesWatched,averageViewDuration,averageViewPercentage,views,likes,dislikes', 'filters' => 'video==' . $video_id);
             $video_analytics_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields);
             // Check we haven't used up our quota
             if (isset($video_analytics_details->error)) {
                 $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $analytics_item = $video_analytics_details->rows[0];
             // If the video is new we may not get any of these values back, but they can't be null
             if (isset($analytics_item)) {
                 $video_attributes['favorites_added'] = $analytics_item[0];
                 $video_attributes['favorites_removed'] = $analytics_item[1];
                 $video_attributes['shares'] = $analytics_item[2];
                 $video_attributes['subscribers_gained'] = $analytics_item[3];
                 $video_attributes['subscribers_lost'] = $analytics_item[4];
                 $video_attributes['minutes_watched'] = $analytics_item[5];
                 $video_attributes['average_view_duration'] = $analytics_item[6];
                 $video_attributes['average_view_percentage'] = $analytics_item[7];
                 // Keep track of these all time counts
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[0], $video_id, 'favorites_added_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[1], $video_id, 'favorites_removed_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[2], $video_id, 'shares_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[3], $video_id, 'subscribers_gained_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[4], $video_id, 'subscribers_lost_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[5], $video_id, 'minutes_watched_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[6], $video_id, 'average_view_duration_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[7], $video_id, 'average_view_percentage_all_time');
             } else {
                 // So set them the 0
                 $video_attributes['favorites_added'] = 0;
                 $video_attributes['favorites_removed'] = 0;
                 $video_attributes['shares'] = 0;
                 $video_attributes['subscribers_gained'] = 0;
                 $video_attributes['subscribers_lost'] = 0;
                 $video_attributes['minutes_watched'] = 0;
                 $video_attributes['average_view_duration'] = 0;
                 $video_attributes['average_view_percentage'] = 0;
             }
             $video_attributes['author_user_id'] = $this->instance->network_user_id;
             $video_attributes['author_username'] = $this->instance->network_username;
             $video_attributes['author_fullname'] = $author_details->full_name;
             $video_attributes['author_avatar'] = $author_details->avatar;
             $video_attributes['source'] = '';
             $video_attributes['network'] = 'youtube';
             $video_dao->addVideo($video_attributes);
             // Now collect per day count data for 2 days ago (testing has shown analytics data is delayed by 2 days)
             $two_days_ago = date('Y-m-d', strtotime("-2 day", strtotime($today)));
             $analytics_fields['start-date'] = $two_days_ago;
             $analytics_fields['end-date'] = $two_days_ago;
             $analytics_today_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields);
             // Check we haven't used up our quota
             if (isset($analytics_today_details->error)) {
                 $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $todays_analytics = $analytics_today_details->rows[0];
             // Check we got data and if not skip this part
             if (isset($todays_analytics)) {
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[0], $video_id, 'favorites_added', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[1], $video_id, 'favorites_removed', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[2], $video_id, 'shares', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[3], $video_id, 'subscribers_gained', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[4], $video_id, 'subscribers_lost', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[5], $video_id, 'minutes_watched', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[6], $video_id, 'average_view_duration', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[7], $video_id, 'average_view_percentage', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[8], $video_id, 'views', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[9], $video_id, 'likes', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[10], $video_id, 'dislikes', $two_days_ago);
             }
             // Check to see how many comments we already have for this video and if there are no new ones skip
             // comment collection as it takes a long time.
             $video_in_db = $video_dao->getVideoByID($video_id, 'youtube');
             $comments_in_db = $video_in_db->reply_count_cache;
             if (!isset($comments_in_db)) {
                 $comments_in_db = 0;
             }
             $api_comments = $item->statistics->commentCount;
             $comments_collected = 0;
             // if this video has any new comments capture those
             if ($api_comments > 0 && $api_comments > $comments_in_db) {
                 // Request the first page of comments for this video
                 $comments_fields = array('alt' => 'json');
                 if (isset($this->developer_key)) {
                     $comments_fields['key'] = $this->developer_key;
                 }
                 $comments = $this->youtube_api_v2_accessor->apiRequest('videos/' . $video_id . '/comments', $comments_fields);
                 // Check we haven't used up our quota
                 if (isset($comments->errors)) {
                     $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__);
                     break;
                 }
                 do {
                     // Iterate through each comment and store the details
                     foreach ($comments->feed->entry as $comment) {
                         // We may have only needed to collect a few new comments so abort if we have everything
                         if ($api_comments == $comments_in_db) {
                             break 2;
                         }
                         // If the user has specified a limit on the number of comments per video to collect each
                         // crawl check we haven't exceeded it
                         if (isset($this->maximum_comments) && $comments_collected >= $this->maximum_comments) {
                             $this->logger->logUserInfo("Stopping collection of comments for video due to reaching " . "limit of " . $this->maximum_comments . " comments.", __METHOD__ . ',' . __LINE__);
                             break 2;
                         }
                         // We may spend a long time collecting comments so also check here if we've exceed the max
                         // time specified by the user
                         if (time() >= $end_time) {
                             $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded " . "max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
                             $had_to_finish_early = true;
                             break 4;
                         }
                         // The id is returned in the XML as part of a long URL, we only want the last part of that
                         // URL
                         $id_string = explode('/', $comment->id->{'$t'});
                         // This will be the last element of id_string
                         $comment_store['post_id'] = $id_string[sizeof($id_string) - 1];
                         // The post text is the comment they made
                         // Remove byte order markers from the comment text from:
                         // http://stackoverflow.com/questions/3255993/how-do-i-remove-i-from-the-beginning
                         // -of-a-file#comment9330944_3256183
                         $comment_store['post_text'] = preg_replace('/\\x{EF}\\x{BB}\\x{BF}/', '', $comment->content->{'$t'});
                         // The author username is the users G+ displayname which we need to query for
                         // To get the G+ ID of this commentor we need to vist their youtube profile page, the ID
                         // needed to get to this users page is the last element of the author URI
                         $user_id_string = explode('/', $comment->author[0]->uri->{'$t'});
                         $name = $this->youtube_api_v2_accessor->apiRequest('users/' . $user_id_string[sizeof($user_id_string) - 1], $comments_fields);
                         $gplus_id = $name->entry->{'yt$googlePlusUserId'}->{'$t'};
                         // // Now we have their G+ ID we can get their details from the G+ API
                         $gplus_fields = array('fields' => 'displayName,id,image,tagline,verified');
                         $user_details = $this->google_plus_api_accessor->apiRequest('people/' . $gplus_id, $this->access_token, $gplus_fields);
                         // Sometimes G+ says the ID is invalid or the user doesn't have a G+ ID
                         if ($user_details->error->code == '404' || $gplus_id == '') {
                             // Use V2 of the YouTube api to get their details
                             $comment_store['author_username'] = $name->entry->{'yt$username'}->{'$t'};
                             $comment_store['author_fullname'] = $name->entry->author[0]->name->{'$t'};
                             $comment_store["author_avatar"] = $name->entry->{'media$thumbnail'}->url;
                             // In this case the user id is their YouTube user ID
                             $comment_store['author_user_id'] = $user_id_string[sizeof($user_id_string) - 1];
                             self::fetchUserFromYouTube($user_id_string[sizeof($user_id_string) - 1], 'youtube_crawler');
                             // If we still didn't get these details we can't store this comment
                             if ($comment_store['author_username'] == null || $comment_store['author_fullname'] == null || $comment_store["author_avatar"] == null) {
                                 continue;
                             }
                         } elseif (isset($user_details->error)) {
                             //Check we haven't exceed the G+ API quota
                             $this->logger->logError('Error querying Google Plus API ', __METHOD__ . ',' . __LINE__);
                             break;
                         } else {
                             $comment_store['author_username'] = $user_details->displayName;
                             $comment_store['author_fullname'] = $user_details->displayName;
                             $comment_store["author_avatar"] = $user_details->image->url;
                             // The author user id is their G+ ID
                             $comment_store['author_user_id'] = $gplus_id;
                             // Make sure we have this commentor in the database
                             self::fetchUser($gplus_id, 'youtube crawler');
                         }
                         // The date they posted the comment
                         $comment_store['pub_date'] = substr($comment->published->{'$t'}, 0, 10) . " " . substr($comment->published->{'$t'}, 11, 8);
                         // Source of the comment
                         $comment_store['source'] = "";
                         // Comments can not be private
                         $comment_store['is_protected'] = false;
                         // Set the network to youtube
                         $comment_store['network'] = 'youtube';
                         // The ID of the author of the video
                         $comment_store['in_reply_to_user_id'] = $this->instance->network_user_id;
                         // The ID of the video this comment is a reply to
                         $comment_store['in_reply_to_post_id'] = $video_id;
                         $insert_id = $post_dao->addPost($comment_store);
                         // If the insert id is null and were not going back to collect the whole archive
                         // we've already captured comments from this point so move on
                         if ($insert_id == null && $archive_loaded) {
                             break 2;
                         }
                         $comments_in_db++;
                         $comments_collected++;
                     }
                     $test = self::determineIfMoreCommentsExist($comments, $video_id);
                     // If there is another page of comments make a request for them
                     if ($test['next']) {
                         $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']);
                         // Check we haven't used up our quota
                         if (isset($comments->errors)) {
                             $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__);
                             break;
                         } elseif ($comments == null) {
                             // If the comments come back as null its because we've been making too many requests too
                             // quickly The YouTube api doesn't return valid JSON telling us this though so
                             // json_decode returns null so back off for 30 seconds and then try again
                             $error_message = "Querying the YouTube API too often waiting for 30 seconds, to ";
                             $error_message .= "prevent this delay add a developer key.";
                             $this->logger->logError($error_message, __METHOD__ . ',' . __LINE__);
                             sleep(30);
                             $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']);
                         }
                     }
                 } while ($test['next']);
             }
             // If we have another page of videos then get the token for the page
             if (isset($user_videos->nextPageToken)) {
                 $videos_fields['pageToken'] = $user_videos->nextPageToken;
             }
         }
         // If we have another page of videos and haven't loaded all this users video yet keep going
         // if we have loaded all this users videos then stop after 1 page (50 videos)
     } while (isset($user_videos->nextPageToken) && !$archive_loaded);
     // If we didn't have to finish the crawl early due to timing out we have collected all this users videos or
     // we have tried more than 20 times stop trying to go back and load the post archive
     if (!$had_to_finish_early || $attempts >= 20) {
         $instance_dao->setPostArchiveLoaded($user_id, 'youtube');
     }
 }
コード例 #2
0
 /**
  * Add user auth link or process incoming auth requests.
  * @param array $options Plugin options array
  */
 protected function setUpYouTubeInteractions(array $options)
 {
     //get options
     $client_id = $options['youtube_client_id']->option_value;
     $client_secret = $options['youtube_client_secret']->option_value;
     //prep redirect URI
     $config = Config::getInstance();
     $site_root_path = $config->getValue('site_root_path');
     $redirect_uri = urlencode(Utils::getApplicationURL() . 'account/?p=youtube');
     //create OAuth link
     $oauth_link = "https://accounts.google.com/o/oauth2/auth?client_id=" . $client_id . "&redirect_uri=" . $redirect_uri . "&scope=https://www.googleapis.com/auth/youtube.readonly%20https://www.googleapis.com/auth/plus.me" . "%20https://www.googleapis.com/auth/yt-analytics.readonly&response_type=code&access_type=offline" . "&approval_prompt=force";
     $this->addToView('oauth_link', $oauth_link);
     // Google provided a code to get an access token
     if (isset($_GET['code'])) {
         $code = $_GET['code'];
         $crawler_plugin_registrar = new YouTubeCrawler(null, null, null, null, null);
         $tokens = $crawler_plugin_registrar->getOAuthTokens($client_id, $client_secret, $code, 'authorization_code', $redirect_uri);
         if (isset($tokens->error)) {
             $this->addErrorMessage("Oops! Something went wrong while obtaining OAuth tokens.<br>YouTube says \"" . $tokens->error . ".\" Please double-check your settings and try again.", 'authorization');
         } else {
             if (isset($tokens->access_token)) {
                 // Get user data
                 // First we need to query the YouTube API for the users G+ ID
                 $youtube_api_accessor = new YouTubeAPIAccessor();
                 $fields = array("part" => "contentDetails", "mine" => "true");
                 $gplus_user_id_query = $youtube_api_accessor->apiRequest('channels', $tokens->access_token, $fields);
                 // The error we could get from this call is a forbidden error if something went wrong with
                 // authentication.
                 if (isset($gplus_user_id_query->error)) {
                     if ($gplus_user_id_query->error->code == "401" && $gplus_user_id_query->error->message == 'Unauthorized') {
                         $this->addErrorMessage("Oops! Looks like YouTube API access isn't turned on. " . "<a href=\"http://code.google.com/apis/console#access\">In the Google APIs console</a>, " . "in Services, flip the YouTube and YouTube analytics API Status switch to 'On' and try again\n                            .", 'authorization');
                     } else {
                         $this->addErrorMessage("Oops! Something went wrong querying the YouTube API.<br>" . "Google says \"" . $gplus_user_id_query->error->code . ": " . $gplus_user_id_query->error->message . ".\" Please double-check your settings and try again.", 'authorization');
                     }
                 } else {
                     // We have should have the users G+ id so we now just need their username from the G+ API
                     $gplus_id = $gplus_user_id_query->items[0]->contentDetails->googlePlusUserId;
                     $gplus_api_accessor = new GooglePlusAPIAccessor();
                     if (isset($gplus_id)) {
                         $gplus_user = $gplus_api_accessor->apiRequest('people/' . $gplus_id, $tokens->access_token, null);
                         if (isset($gplus_user->error)) {
                             if ($gplus_user->error->code == "403" && $gplus_user->error->message == 'Access Not Configured') {
                                 $this->addErrorMessage("Oops! Looks like Google+ API access isn't turned on. " . "<a href=\"http://code.google.com/apis/console#access\">In the Google APIs " . "console</a> in Services, flip the Google+ API Status switch to 'On' and " . "try again.", 'authorization');
                             } else {
                                 $this->addErrorMessage("Oops! Something went wrong querying the Google+ API.<br>" . "Google says \"" . $gplus_user->error->code . ": " . $gplus_user->error->message . ".\" Please double-check your settings and try again.", 'authorization');
                             }
                         } else {
                             if (isset($gplus_user->id) && isset($gplus_user->displayName)) {
                                 $gplus_user_id = $gplus_user->id;
                                 $gplus_username = $gplus_user->displayName;
                                 //Process tokens
                                 $this->saveAccessTokens($gplus_user_id, $gplus_username, $tokens->access_token, $tokens->refresh_token);
                             } else {
                                 $this->addErrorMessage("Oops! Something went wrong querying the Google+ API.<br>" . "Google says \"" . Utils::varDumpToString($gplus_user) . ".\" Please double-check your settings and try again.", 'authorization');
                             }
                         }
                     } else {
                         // It may be possible that the user has not linked their YouTube account to their G+ account
                         // so we might not get a G+ ID
                         $this->addErrorMessage("You don't have a Google+ ID associated with your YouTube account, " . "go to YouTube and link your Google+ account to your YouTube account to use this plugin. " . "For more information click <a href=https://www.thinkup.com/docs/userguide/settings/plugin" . "s/youtube.html>here</a>", 'authorization');
                     }
                 }
             }
         }
     }
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $owner_instances = $instance_dao->getByOwnerAndNetwork($this->owner, 'youtube');
     $this->addToView('owner_instances', $owner_instances);
 }