public function testReportVersion() { //create new instance $instance = new Instance(); $instance->network_username = '******'; $instance->network = 'twitter'; $_SERVER['HTTP_HOST'] = "mytestthinkup"; //report version $result = Reporter::reportVersion($instance); $this->assertPattern("/http:\\/\\/thinkup.com\\/version.php\\?v\\=/", $result[0]); $this->assertEqual($result[1], "http://mytestthinkup/?u=danica+mckellar&n=twitter"); $this->assertEqual($result[2], 200); }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('InstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('googleplus', true); //get cached $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); //crawl Google+ users $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'google+'); if (isset($options['google_plus_client_id']->option_value) && isset($options['google_plus_client_secret']->option_value)) { foreach ($instances as $instance) { $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $access_token = $tokens['oauth_access_token']; $refresh_token = $tokens['oauth_access_token_secret']; $instance_dao->updateLastRun($instance->id); $google_plus_crawler = new GooglePlusCrawler($instance, $access_token); $dashboard_module_cacher = new DashboardModuleCacher($instance); try { $google_plus_crawler->initializeInstanceUser($options['google_plus_client_id']->option_value, $options['google_plus_client_secret']->option_value, $access_token, $refresh_token, $current_owner->id); $google_plus_crawler->fetchInstanceUserPosts(); } catch (Exception $e) { $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($google_plus_crawler->instance, 0, $logger); Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } } }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('FacebookInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('facebook', true); //get cached $max_crawl_time = isset($options['max_crawl_time']) ? $options['max_crawl_time']->option_value : 20; //convert to seconds $max_crawl_time = $max_crawl_time * 60; $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); //crawl Facebook user profiles and pages $profiles = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook'); $pages = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook page'); $instances = array_merge($profiles, $pages); foreach ($instances as $instance) { $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $access_token = $tokens['oauth_access_token']; $instance_dao->updateLastRun($instance->id); $facebook_crawler = new FacebookCrawler($instance, $access_token, $max_crawl_time); $dashboard_module_cacher = new DashboardModuleCacher($instance); try { $facebook_crawler->fetchPostsAndReplies(); } catch (APIOAuthException $e) { $logger->logUserError(get_class($e) . ": " . $e->getMessage(), __METHOD__ . ',' . __LINE__); //Don't send reauth email if it's app-level API rate limting //https://developers.facebook.com/docs/reference/ads-api/api-rate-limiting/#applimit if (strpos($e->getMessage(), 'Application request limit reached') === false && strpos($e->getMessage(), 'Please retry your request later.') === false) { //The access token is invalid, save in owner_instances table $owner_instance_dao->setAuthErrorByTokens($instance->id, $access_token, '', $e->getMessage()); //Send email alert //Get owner by auth tokens first, then send to that person $owner_email_to_notify = $owner_instance_dao->getOwnerEmailByInstanceTokens($instance->id, $access_token, ''); $email_attempt = $this->sendInvalidOAuthEmailAlert($owner_email_to_notify, $instance->network_username); if ($email_attempt) { $logger->logUserInfo('Sent reauth email to ' . $owner_email_to_notify, __METHOD__ . ',' . __LINE__); } else { $logger->logInfo('Didn\'t send reauth email to ' . $owner_email_to_notify, __METHOD__ . ',' . __LINE__); } } else { $logger->logInfo('Facebook API returned an error: ' . $e->getMessage() . ' Do nothing now and try again later', __METHOD__ . ',' . __LINE__); } } catch (Exception $e) { $logger->logUserError(get_class($e) . ": " . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($facebook_crawler->instance, 0, $logger); Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('InstagramInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('instagram', true); //get cached $max_api_calls = isset($options['max_api_calls']) ? $options['max_api_calls']->option_value : 2500; $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); //crawl instagram user profiles and pages $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'instagram'); foreach ($instances as $instance) { $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $access_token = $tokens['oauth_access_token']; $instance_dao->updateLastRun($instance->id); $dashboard_module_cacher = new DashboardModuleCacher($instance); try { /** * 1. Fetch user info, media + its likes and comments. * 2. Fetch user's likes. * 3. Fetch user's friends, and update stale relationships. * 4. Fetch user's followers, and update stale relationships. * 5. Update stale friends' profiles. */ $instagram_crawler = new InstagramCrawler($instance, $access_token, $max_api_calls); $instagram_crawler->fetchPostsAndReplies(); $instagram_crawler->fetchLikes(); $instagram_crawler->fetchFriends(); $instagram_crawler->fetchFollowers(); $instagram_crawler->updateStaleFriendsProfiles(); } catch (Instagram\Core\ApiAuthException $e) { //The access token is invalid, save in owner_instances table $owner_instance_dao->setAuthErrorByTokens($instance->id, $access_token, '', $e->getMessage()); //Send email alert //Get owner by auth tokens first, then send to that person $owner_email_to_notify = $owner_instance_dao->getOwnerEmailByInstanceTokens($instance->id, $access_token, ''); $email_attempt = $this->sendInvalidOAuthEmailAlert($owner_email_to_notify, $instance->network_username); if ($email_attempt) { $logger->logUserInfo('Sent reauth email to ' . $owner_email_to_notify, __METHOD__ . ',' . __LINE__); } else { $logger->logInfo('Didn\'t send reauth email to ' . $owner_email_to_notify, __METHOD__ . ',' . __LINE__); } $logger->logUserError(get_class($e) . ' ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } catch (Exception $e) { $logger->logUserError(get_class($e) . ' ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($instagram_crawler->instance, 0, $logger); Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('InstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('facebook', true); //get cached $max_crawl_time = isset($options['max_crawl_time']) ? $options['max_crawl_time']->option_value : 20; //convert to seconds $max_crawl_time = $max_crawl_time * 60; $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); //crawl Facebook user profiles and pages $profiles = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook'); $pages = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook page'); $instances = array_merge($profiles, $pages); foreach ($instances as $instance) { $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $access_token = $tokens['oauth_access_token']; $instance_dao->updateLastRun($instance->id); $facebook_crawler = new FacebookCrawler($instance, $access_token, $max_crawl_time); $dashboard_module_cacher = new DashboardModuleCacher($instance); try { $facebook_crawler->fetchPostsAndReplies(); } catch (APIOAuthException $e) { //The access token is invalid, save in owner_instances table $owner_instance_dao->setAuthError($current_owner->id, $instance->id, $e->getMessage()); //Send email alert $this->sendInvalidOAuthEmailAlert($current_owner->email, $instance->network_username); $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } catch (Exception $e) { $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($facebook_crawler->instance, 0, $logger); Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('InstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('youtube', true); //get cached $max_crawl_time = isset($options['max_crawl_time']) ? $options['max_crawl_time']->option_value : 20; //convert to seconds $max_crawl_time = $max_crawl_time * 60; $developer_key = isset($options['developer_key']) ? $options['developer_key']->option_value : null; $max_comments = isset($options['max_comments']) ? $options['max_comments']->option_value : null; $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); //crawl youtube users $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'youtube'); if (isset($options['youtube_client_id']->option_value) && isset($options['youtube_client_secret']->option_value)) { foreach ($instances as $instance) { $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $access_token = $tokens['oauth_access_token']; $refresh_token = $tokens['oauth_access_token_secret']; $instance_dao->updateLastRun($instance->id); $youtube_crawler = new YouTubeCrawler($instance, $access_token, $max_crawl_time, $developer_key, $max_comments); $dashboard_module_cacher = new DashboardModuleCacher($instance); try { $youtube_crawler->initializeInstanceUser($options['youtube_client_id']->option_value, $options['youtube_client_secret']->option_value, $access_token, $refresh_token, $current_owner->id); $youtube_crawler->fetchInstanceUserVideos(); } catch (Exception $e) { $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($youtube_crawler->instance, 0, $logger); Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } } }
public function crawl() { $logger = Logger::getInstance(); $config = Config::getInstance(); $instance_dao = DAOFactory::getDAO('InstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('foursquare', true); $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('foursquare'); // Check the client id and secret are set or we can't crawl if (isset($options['foursquare_client_id']->option_value) && isset($options['foursquare_client_secret']->option_value)) { // For each instance of foursquare on this install foreach ($instances as $instance) { if (!$owner_instance_dao->doesOwnerHaveAccessToInstance($current_owner, $instance)) { // Owner doesn't have access to this instance; let's not crawl it. continue; } // Set the user name in the log $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username); // Write to the log that we have started to collect data $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); // Get the OAuth tokens for this user $tokens = $owner_instance_dao->getOAuthTokens($instance->id); // Set the access token $access_token = $tokens['oauth_access_token']; // Update the last time we crawled $instance_dao->updateLastRun($instance->id); // Create a new crawler $crawler = new FoursquareCrawler($instance, $access_token); // Check the OAuth tokens we have are valid try { $logger->logInfo("About to initializeInstanceUser", __METHOD__ . ',' . __LINE__); $user = $crawler->initializeInstanceUser($access_token, $current_owner->id); if (isset($user) && $user instanceof User) { $logger->logInfo("User initialized", __METHOD__ . ',' . __LINE__); } // Get the data we want and store it in the database $logger->logInfo("About to fetchInstanceUserCheckins", __METHOD__ . ',' . __LINE__); $crawler->fetchInstanceUserCheckins(); } catch (Exception $e) { // Catch any errors that happen when we check the validity of the OAuth tokens $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $logger->logInfo("About to cache dashboard modules", __METHOD__ . ',' . __LINE__); // Cache the insights to improve the page load speed of the dashboard $dashboard_module_cacher = new DashboardModuleCacher($instance); $dashboard_module_cacher->cacheDashboardModules(); $instance_dao->save($crawler->instance, 0, $logger); Reporter::reportVersion($instance); // Tell the user crawling was sucessful $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__); } } }
public function crawl() { $config = Config::getInstance(); $logger = Logger::getInstance(); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO'); // get oauth values $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('twitter', true); $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'twitter'); foreach ($instances as $instance) { $logger->setUsername($instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null; $dashboard_module_cacher = new DashboardModuleCacher($instance); try { if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $archive_limit = isset($options['archive_limit']->option_value) ? $options['archive_limit']->option_value : 3200; $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $archive_limit, $num_twitter_errors); $twitter_crawler = new TwitterCrawler($instance, $api); $instance_dao->updateLastRun($instance->id); $twitter_crawler->fetchInstanceUserTweets(); $twitter_crawler->fetchInstanceUserMentions(); $twitter_crawler->fetchInstanceUserFriends(); $twitter_crawler->fetchInstanceUserFollowers(); $twitter_crawler->fetchInstanceUserGroups(); $twitter_crawler->fetchRetweetsOfInstanceUser(); $twitter_crawler->fetchInstanceUserFavorites(); $twitter_crawler->updateStaleGroupMemberships(); $twitter_crawler->fetchStrayRepliedToTweets(); $twitter_crawler->fetchUserFriendsByIDs(); $twitter_crawler->fetchUnloadedFriendDetails(); $twitter_crawler->fetchUnloadedFollowerDetails(); $twitter_crawler->cleanUpFollows(); $twitter_crawler->updateFriendsProfiles(); //Retrieve search results for saved keyword/hashtags $instances_hashtags = $instance_hashtag_dao->getByInstance($instance->id); foreach ($instances_hashtags as $instance_hashtag) { $twitter_crawler->fetchInstanceHashtagTweets($instance_hashtag); } } else { throw new Exception('Missing Twitter OAuth tokens.'); } } catch (Exception $e) { $logger->logUserError(get_class($e) . " while crawling " . $instance->network_username . " on Twitter: " . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($instance, $twitter_crawler->user->post_count, $logger); } Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); } }