コード例 #1
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'twitter');
     foreach ($instances as $instance) {
         $logger->setUsername($instance->network_username);
         $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         $dashboard_module_cacher = new DashboardModuleCacher($instance);
         try {
             if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
                 $archive_limit = isset($options['archive_limit']->option_value) ? $options['archive_limit']->option_value : 3200;
                 $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $archive_limit, $num_twitter_errors);
                 $twitter_crawler = new TwitterCrawler($instance, $api);
                 $instance_dao->updateLastRun($instance->id);
                 $twitter_crawler->fetchInstanceUserTweets();
                 $twitter_crawler->fetchInstanceUserMentions();
                 $twitter_crawler->fetchInstanceUserFriends();
                 $twitter_crawler->fetchInstanceUserFollowers();
                 $twitter_crawler->fetchInstanceUserGroups();
                 $twitter_crawler->fetchRetweetsOfInstanceUser();
                 $twitter_crawler->fetchInstanceUserFavorites();
                 $twitter_crawler->updateStaleGroupMemberships();
                 $twitter_crawler->fetchStrayRepliedToTweets();
                 $twitter_crawler->fetchUserFriendsByIDs();
                 $twitter_crawler->fetchUnloadedFriendDetails();
                 $twitter_crawler->fetchUnloadedFollowerDetails();
                 $twitter_crawler->cleanUpFollows();
                 $twitter_crawler->updateFriendsProfiles();
                 //Retrieve search results for saved keyword/hashtags
                 $instances_hashtags = $instance_hashtag_dao->getByInstance($instance->id);
                 foreach ($instances_hashtags as $instance_hashtag) {
                     $twitter_crawler->fetchInstanceHashtagTweets($instance_hashtag);
                 }
             } else {
                 throw new Exception('Missing Twitter OAuth tokens.');
             }
         } catch (Exception $e) {
             $logger->logUserError(get_class($e) . " while crawling " . $instance->network_username . " on Twitter: " . $e->getMessage(), __METHOD__ . ',' . __LINE__);
         }
         $dashboard_module_cacher->cacheDashboardModules();
         // Save instance
         if (isset($twitter_crawler->user)) {
             $instance_dao->save($instance, $twitter_crawler->user->post_count, $logger);
         }
         Reporter::reportVersion($instance);
         $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
     }
 }
コード例 #2
0
 public function crawl()
 {
     global $db;
     global $conn;
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $id = DAOFactory::getDAO('InstanceDAO');
     $oid = new OwnerInstanceDAO($db, $logger);
     $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         $logger->setUsername($instance->network_username);
         $tokens = $oid->getOAuthTokens($instance->id);
         $noauth = true;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $config->getValue('oauth_consumer_key'), $config->getValue('oauth_consumer_secret'), $instance, $config->getValue('archive_limit'));
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $config->getValue('oauth_consumer_key'), $config->getValue('oauth_consumer_secret'), $instance, $config->getValue('archive_limit'));
         }
         $crawler = new TwitterCrawler($instance, $api, $db);
         $api->init();
         if ($api->available_api_calls_for_crawler > 0) {
             $id->updateLastRun($instance->id);
             // No auth req'd
             $crawler->fetchInstanceUserInfo();
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceUserFollowers();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->fetchFriendTweetsAndFriends();
             // TODO: Get direct messages
             // TODO: Gather favorites data
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             $crawler->cleanUpFollows();
             // Save instance
             $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api);
         }
     }
     $logger->close();
     # Close logging
 }
コード例 #3
0
 public function testFetchRetweetsOfInstanceuserBudget()
 {
     self::setUpInstanceUserGinaTrapani();
     // set up crawl limit budget
     $crawl_limit = array('fetchUserTimelineForRetweet' => array('count' => 2, 'remaining' => 0));
     $this->api->setCallerLimits($crawl_limit);
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->fetchInstanceUserInfo();
     $builder = FixtureBuilder::build('posts', array('post_id' => '14947487415', 'author_user_id' => '930061', 'author_username' => 'ginatrapani', 'author_fullname' => 'Gina Trapani', 'post_text' => '"Wearing your new conference tee shirt does NOT count as dressing up."', 'pub_date' => '-1d', 'reply_count_cache' => 1, 'old_retweet_count_cache' => 0, 'retweet_count_cache' => 0, 'retweet_count_api' => 0));
     $post_dao = DAOFactory::getDAO('PostDAO');
     $twitter_crawler->fetchRetweetsOfInstanceUser();
     $post = $post_dao->getPost('14947487415', 'twitter');
     $this->assertEqual($post->retweet_count_cache, 0, '0 new-style retweets from cache count');
 }
コード例 #4
0
    public function testFetchRetweetsOfInstanceuser() {
        self::setUpInstanceUserGinaTrapani();
        $tc = new TwitterCrawler($this->instance, $this->api);
        $tc->fetchInstanceUserInfo();

        //first, load retweeted tweet into db
        // we now get the 'new-style' retweet count from the retweet_count field in the xml,
        // which is parsed into 'retweet_count_cache' in the post vals.  This will not necessarily match
        // the number of retweets in the database any more (but does in this test case).
        $builder = FixtureBuilder::build('posts', array('post_id'=>14947487415, 'author_user_id'=>930061,
        'author_username'=>'ginatrapani', 'author_fullname'=>'Gina Trapani', 'post_text'=>
        '"Wearing your new conference tee shirt does NOT count as dressing up."', 'pub_date'=>'-1d',
        'reply_count_cache'=>1, 'old_retweet_count_cache'=>0, 'retweet_count_cache'=>3));

        $pdao = DAOFactory::getDAO('PostDAO');
        $tc->fetchRetweetsOfInstanceUser();
        $post = $pdao->getPost(14947487415, 'twitter');
        $this->assertEqual($post->retweet_count_cache, 3, '3 new-style retweets detected');
        $retweets = $pdao->getRetweetsOfPost(14947487415, 'twitter', true);
        $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');

        //make sure duplicate posts aren't going into the db on next crawler run
        self::setUpInstanceUserGinaTrapani();
        $tc = new TwitterCrawler($this->instance, $this->api);
        $tc->fetchInstanceUserInfo();

        $tc->fetchRetweetsOfInstanceUser();
        $post = $pdao->getPost(14947487415, 'twitter');
        $this->assertEqual($post->retweet_count_cache, 3, '3 new-style retweets detected');
        $retweets = $pdao->getRetweetsOfPost(14947487415, 'twitter', true);
        $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');

        $post = $pdao->getPost(12722783896, 'twitter');
        $rts2 = $pdao->getRetweetsOfPost(12722783896, 'twitter', true);
        $this->assertEqual(sizeof($rts2), 1, '1 retweet loaded');
        $this->assertEqual($rts2[0]->in_rt_of_user_id, 930061);
    }
コード例 #5
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         if (!$owner_instance_dao->doesOwnerHaveAccess($current_owner, $instance)) {
             // Owner doesn't have access to this instance; let's not crawl it.
             continue;
         }
         $logger->setUsername($instance->network_username);
         $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
         $noauth = true;
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         $max_api_calls_per_crawl = isset($options['max_api_calls_per_crawl']) ? $options['max_api_calls_per_crawl']->option_value : 350;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         $api_calls_to_leave_unmade_per_minute = isset($options['api_calls_to_leave_unmade_per_minute']) ? $options['api_calls_to_leave_unmade_per_minute']->option_value : 2.0;
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         }
         $crawler = new TwitterCrawler($instance, $api);
         $api->init();
         if ($api->available_api_calls_for_crawler > 0) {
             $instance_dao->updateLastRun($instance->id);
             // No auth req'd
             //$crawler->fetchInstanceUserInfo();
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceFavorites();
                 $crawler->fetchInstanceUserFollowers();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->cleanUpMissedFavsUnFavs();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->fetchFriendTweetsAndFriends();
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             $crawler->cleanUpFollows();
             // Save instance
             if (isset($crawler->user)) {
                 $instance_dao->save($instance, $crawler->user->post_count, $logger);
             }
             $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         }
     }
 }
コード例 #6
0
 public function testFetchRetweetsOfInstanceuser()
 {
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api);
     $tc->fetchInstanceUserInfo();
     //first, load retweeted tweet into db
     $q = "INSERT INTO tu_posts (post_id, author_user_id, author_username, author_fullname, author_avatar,\n        post_text, source, pub_date, reply_count_cache, retweet_count_cache) VALUES (14947487415, 930061, \n        'ginatrapani', 'Gina Trapani', 'avatar.jpg', \n        '"Wearing your new conference tee shirt does NOT count as dressing up."', 'web', \n        '2006-01-01 00:00:00', " . rand(0, 4) . ", 0);";
     $this->db->exec($q);
     $pdao = DAOFactory::getDAO('PostDAO');
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0, 'twitter');
     $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, 'twitter', true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
     //make sure duplicate posts aren't going into the db on next crawler run
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api);
     $tc->fetchInstanceUserInfo();
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0, 'twitter');
     $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, 'twitter', true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
 }
コード例 #7
0
 public function testFetchRetweetsOfInstanceuser()
 {
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api);
     $tc->fetchInstanceUserInfo();
     //first, load retweeted tweet into db
     // we now get the 'new-style' retweet count from the retweet_count field in the xml,
     // which is parsed into 'retweet_count_cache' in the post vals.  This will not necessarily match
     // the number of retweets in the database any more (but does in this test case).
     $builder = FixtureBuilder::build('posts', array('post_id' => 14947487415.0, 'author_user_id' => 930061, 'author_username' => 'ginatrapani', 'author_fullname' => 'Gina Trapani', 'post_text' => '"Wearing your new conference tee shirt does NOT count as dressing up."', 'pub_date' => '-1d', 'reply_count_cache' => 1, 'old_retweet_count_cache' => 0, 'retweet_count_cache' => 0, 'retweet_count_api' => 0));
     $pdao = DAOFactory::getDAO('PostDAO');
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0, 'twitter');
     $this->assertEqual($post->retweet_count_cache, 3, '3 new-style retweets from cache count');
     // in processing the retweets of the post, if they contain a <retweeted_status> element pointing
     // to the original post, and that original post information includes a retweet count, we will update the
     // original post in the db with that count.  In this test data that count is 2, 'behind' the database info.
     $this->assertEqual($post->retweet_count_api, 2, '2 new-style retweets count from API');
     // should not have processed any old-style retweets here
     $this->assertEqual($post->old_retweet_count_cache, 0, '0 old-style retweets count from API');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, 'twitter', true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
     //make sure duplicate posts aren't going into the db on next crawler run
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api);
     $tc->fetchInstanceUserInfo();
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0, 'twitter');
     $this->assertEqual($post->retweet_count_cache, 3, '3 new-style retweets detected');
     $this->assertEqual($post->retweet_count_api, 2, '2 new-style retweets count from API');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, 'twitter', true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
     $post = $pdao->getPost(12722783896.0, 'twitter');
     $rts2 = $pdao->getRetweetsOfPost(12722783896.0, 'twitter', true);
     $this->assertEqual(sizeof($rts2), 1, '1 retweet loaded');
     $this->assertEqual($rts2[0]->in_rt_of_user_id, 930061);
 }
コード例 #8
0
 public function testFetchRetweetsOfInstanceUser()
 {
     $this->debug(__METHOD__);
     self::setUpInstanceUserGinaTrapani();
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/');
     //first, load retweeted tweet into db
     // we now get the 'new-style' retweet count from the retweet_count field in the xml,
     // which is parsed into 'retweet_count_cache' in the post vals.  This will not necessarily match
     // the number of retweets in the database any more (but does in this test case).
     $builder = FixtureBuilder::build('posts', array('post_id' => '300000912989118466', 'author_user_id' => '930061', 'author_username' => 'ginatrapani', 'author_fullname' => 'Gina Trapani', 'post_text' => '@jjg unsurprisingly Dykes Lumber in Brooklyn has a thriving t-shirt business', 'pub_date' => '-1d', 'reply_count_cache' => 1, 'old_retweet_count_cache' => 0, 'retweet_count_cache' => 0, 'retweet_count_api' => 0));
     $post_dao = DAOFactory::getDAO('PostDAO');
     $twitter_crawler->fetchRetweetsOfInstanceUser();
     $post = $post_dao->getPost('300000912989118466', 'twitter');
     $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache');
     // in processing the retweets of the post, if they contain a <retweeted_status> element pointing
     // to the original post, and that original post information includes a retweet count, we will update the
     // original post in the db with that count.  In this test data that count is 2, 'behind' the database info.
     $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API');
     // should not have processed any old-style retweets here
     $this->assertEqual($post->old_retweet_count_cache, 0, '0 old-style retweets count from API');
     $retweets = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true);
     $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded');
     //make sure duplicate posts aren't going into the db on next crawler run
     self::setUpInstanceUserGinaTrapani();
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/');
     $twitter_crawler->fetchInstanceUserInfo();
     $twitter_crawler->fetchRetweetsOfInstanceUser();
     $post = $post_dao->getPost('300000912989118466', 'twitter');
     $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache');
     $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API');
     $retweets = $post_dao->getRetweetsOfPost('300000912989118466', 'twitter', true);
     $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded');
     $post = $post_dao->getPost('300000311127457792', 'twitter');
     $rts2 = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true);
     $this->assertEqual(sizeof($rts2), 0, '0 retweets loaded');
     //$this->assertEqual($rts2[0]->in_rt_of_user_id, '930061');
 }
コード例 #9
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $id = DAOFactory::getDAO('InstanceDAO');
     $oid = DAOFactory::getDAO('OwnerInstanceDAO');
     $od = DAOFactory::getDAO('OwnerDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $od->getByEmail(Session::getLoggedInUser());
     $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         if (!$oid->doesOwnerHaveAccess($current_owner, $instance)) {
             // Owner doesn't have access to this instance; let's not crawl it.
             continue;
         }
         $logger->setUsername($instance->network_username);
         $tokens = $oid->getOAuthTokens($instance->id);
         $noauth = true;
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors);
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors);
         }
         $crawler = new TwitterCrawler($instance, $api);
         $api->init();
         if ($api->available_api_calls_for_crawler > 0) {
             $id->updateLastRun($instance->id);
             // No auth req'd
             //$crawler->fetchInstanceUserInfo();
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceUserFollowers();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->fetchFriendTweetsAndFriends();
             //@TODO Gather favorites data
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             $crawler->cleanUpFollows();
             // Save instance
             if (isset($crawler->owner_object)) {
                 $id->save($instance, $crawler->owner_object->post_count, $logger);
             }
         }
     }
     $logger->close();
     # Close logging
 }