public function crawl()
 {
     $logger = Logger::getInstance();
     $config = Config::getInstance();
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('googleplus', true);
     //get cached
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     //crawl Google+ users
     $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'google+');
     if (isset($options['google_plus_client_id']->option_value) && isset($options['google_plus_client_secret']->option_value)) {
         foreach ($instances as $instance) {
             $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username);
             $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__);
             $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
             $access_token = $tokens['oauth_access_token'];
             $refresh_token = $tokens['oauth_access_token_secret'];
             $instance_dao->updateLastRun($instance->id);
             $crawler = new GooglePlusCrawler($instance, $access_token);
             $insights_generator = new InsightsGenerator($instance);
             try {
                 $crawler->initializeInstanceUser($options['google_plus_client_id']->option_value, $options['google_plus_client_secret']->option_value, $access_token, $refresh_token, $current_owner->id);
                 $crawler->fetchInstanceUserPosts();
             } catch (Exception $e) {
                 $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__);
             }
             $insights_generator->generateInsights();
             $instance_dao->save($crawler->instance, 0, $logger);
             $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__);
         }
     }
 }
 public function crawl()
 {
     $logger = Logger::getInstance();
     $config = Config::getInstance();
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('facebook', true);
     //get cached
     $max_crawl_time = isset($options['max_crawl_time']) ? $options['max_crawl_time']->option_value : 20;
     //convert to seconds
     $max_crawl_time = $max_crawl_time * 60;
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     //crawl Facebook user profiles and pages
     $profiles = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook');
     $pages = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'facebook page');
     $instances = array_merge($profiles, $pages);
     foreach ($instances as $instance) {
         $logger->setUsername(ucwords($instance->network) . ' | ' . $instance->network_username);
         $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__);
         $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
         $access_token = $tokens['oauth_access_token'];
         $instance_dao->updateLastRun($instance->id);
         $crawler = new FacebookCrawler($instance, $access_token, $max_crawl_time);
         $insights_generator = new InsightsGenerator($instance);
         try {
             $crawler->fetchPostsAndReplies();
         } catch (APIOAuthException $e) {
             //The access token is invalid, save in owner_instances table
             $owner_instance_dao->setAuthError($current_owner->id, $instance->id, $e->getMessage());
             //Send email alert
             $this->sendInvalidOAuthEmailAlert($current_owner->email, $instance->network_username);
             $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__);
         } catch (Exception $e) {
             $logger->logUserError('EXCEPTION: ' . $e->getMessage(), __METHOD__ . ',' . __LINE__);
         }
         $insights_generator->generateInsights();
         $instance_dao->save($crawler->instance, 0, $logger);
         $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . "'s " . ucwords($instance->network), __METHOD__ . ',' . __LINE__);
     }
 }
Example #3
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         if (!$owner_instance_dao->doesOwnerHaveAccessToInstance($current_owner, $instance)) {
             // Owner doesn't have access to this instance; let's not crawl it.
             continue;
         }
         $logger->setUsername($instance->network_username);
         $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
         $noauth = true;
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         $max_api_calls_per_crawl = isset($options['max_api_calls_per_crawl']) ? $options['max_api_calls_per_crawl']->option_value : 350;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         $api_calls_to_leave_unmade_per_minute = isset($options['api_calls_to_leave_unmade_per_minute']) ? $options['api_calls_to_leave_unmade_per_minute']->option_value : 2.0;
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         }
         $crawler = new TwitterCrawler($instance, $api);
         $insights_generator = new InsightsGenerator($instance);
         $api->init();
         // budget our twitter calls
         $call_limits = $this->budgetCrawlLimits($api->available_api_calls_for_crawler, $noauth);
         $api->setCallerLimits($call_limits);
         if ($api->available_api_calls_for_crawler > 0) {
             $instance_dao->updateLastRun($instance->id);
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceFavorites();
                 $crawler->fetchInstanceUserFollowers();
                 $crawler->fetchInstanceUserGroups();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->cleanUpMissedFavsUnFavs();
                 $crawler->updateStaleGroupMemberships();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->cleanUpFollows();
             $crawler->fetchFriendTweetsAndFriends();
             $insights_generator->generateInsights();
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             // Save instance
             if (isset($crawler->user)) {
                 $instance_dao->save($instance, $crawler->user->post_count, $logger);
             }
             $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         }
     }
 }