function testIDsList()
 {
     global $THINKTANK_CFG;
     $to = new TwitterOAuth('', '', '', '');
     $result = $to->oAuthRequest('https://twitter.com/followers/ids.xml', array(), 'GET');
     $r = array();
     $r["id"] = 0;
     $r['network_username'] = '******';
     $r['network_user_id'] = 0;
     $r['last_status_id'] = 0;
     $r['last_page_fetched_replies'] = 0;
     $r['last_page_fetched_tweets'] = 0;
     $r['total_posts_in_system'] = 0;
     $r['total_replies_in_system'] = 0;
     $r['total_follows_in_system'] = 0;
     $r['total_users_in_system'] = 0;
     $r['is_archive_loaded_replies'] = 0;
     $r['is_archive_loaded_follows'] = 0;
     $r['crawler_last_run'] = '1/1/2007';
     $r['earliest_reply_in_system'] = 0;
     $r['api_calls_to_leave_unmade_per_minute'] = 5;
     $r['avg_replies_per_day'] = 0;
     $r['is_public'] = 1;
     $r['is_active'] = 1;
     $r['network'] = 'twitter';
     $i = new Instance($r);
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
     $users = $api->parseXML($result);
     $next_cursor = $api->getNextCursor();
     //echo 'Next cursor is ' . $next_cursor;
     $this->assertTrue($next_cursor == '1326272872342936860');
 }
 public function testInitializeEndpointRateLimits()
 {
     $this->debug(__METHOD__);
     $api = new CrawlerTwitterAPIAccessorOAuth($oauth_token = '111', $oauth_token_secret = '222', $oauth_consumer_key = 1234, $oauth_consumer_secret = 4567, $archive_limit = 3200, $num_twitter_errors = 5);
     $api->to->setDataPathFolder('testofcrawlertwitterapiaccessoroauth/testinitializeendpointratelimits/');
     $api->initializeEndpointRateLimits();
     $this->assertEqual($api->endpoints["mentions"]->getRemaining(), 15);
     $this->assertEqual($api->endpoints["mentions"]->getReset(), 1361069069);
 }
Пример #3
0
function twitter_crawl()
{
    global $THINKTANK_CFG;
    global $db;
    global $conn;
    $logger = new Logger($THINKTANK_CFG['log_location']);
    $id = new InstanceDAO($db, $logger);
    $oid = new OwnerInstanceDAO($db, $logger);
    $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter');
    foreach ($instances as $i) {
        $logger->setUsername($i->network_username);
        $tokens = $oid->getOAuthTokens($i->id);
        $noauth = true;
        if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
            $noauth = false;
        }
        if ($noauth) {
            $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        } else {
            $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        }
        $crawler = new TwitterCrawler($i, $logger, $api, $db);
        $cfg = new Config($i->network_username, $i->network_user_id);
        $api->init($logger);
        if ($api->available_api_calls_for_crawler > 0) {
            $id->updateLastRun($i->id);
            // No auth req'd
            $crawler->fetchInstanceUserInfo();
            // No auth for public Twitter users
            $crawler->fetchInstanceUserTweets();
            if (!$noauth) {
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserRetweetsByMe();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserMentions();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserFriends();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserFollowers();
            }
            $crawler->fetchStrayRepliedToTweets();
            $crawler->fetchUnloadedFollowerDetails();
            $crawler->fetchFriendTweetsAndFriends();
            // TODO: Get direct messages
            // TODO: Gather favorites data
            if ($noauth) {
                // No auth req'd
                $crawler->fetchSearchResults($i->network_username);
            }
            $crawler->cleanUpFollows();
            // Save instance
            $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api);
        }
    }
    $logger->close();
    # Close logging
}
 function testIDsList()
 {
     $to = new TwitterOAuth('', '', '', '');
     $result = $to->oAuthRequest('https://twitter.com/followers/ids.xml', array(), 'GET');
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
     $users = $api->parseXML($result);
     $next_cursor = $api->getNextCursor();
     //echo 'Next cursor is ' . $next_cursor;
     $this->assertTrue($next_cursor == '1326272872342936860');
 }
 public function testSearchResults()
 {
     global $THINKUP_CFG;
     $to = new TwitterOAuth('', '', '', '');
     $twitter_data = $to->http('http://search.twitter.com/search.json?q=%40whitehouse&result_type=recent');
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, $this->getTestInstance(), 3200);
     $results = $api->parseJSON($twitter_data);
     //print_r($results);
     $this->assertEqual($results[0]['post_id'], 11837318124);
 }
Пример #6
0
function twitter_crawl()
{
    global $THINKTANK_CFG;
    global $db;
    global $conn;
    $logger = new Logger($THINKTANK_CFG['log_location']);
    $id = new InstanceDAO($db, $logger);
    $oid = new OwnerInstanceDAO($db, $logger);
    $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']);
    $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']);
    $instances = $id->getAllActiveInstancesStalestFirst();
    foreach ($instances as $i) {
        $logger->setUsername($i->network_username);
        $tokens = $oid->getOAuthTokens($i->id);
        $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        $crawler = new TwitterCrawler($i, $logger, $api, $db);
        $cfg = new Config($i->network_username, $i->network_user_id);
        $api->init($logger);
        if ($api->available_api_calls_for_crawler > 0) {
            $id->updateLastRun($i->id);
            $crawler->fetchInstanceUserInfo();
            $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserFriends();
            $crawler->fetchInstanceUserFollowers();
            $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi);
            $crawler->fetchUnloadedFollowerDetails();
            $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi);
            // TODO: Get direct messages
            // TODO: Gather favorites data
            $crawler->cleanUpFollows();
            // Save instance
            $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api);
        }
    }
    $logger->close();
    # Close logging
    if (isset($conn)) {
        $db->closeConnection($conn);
    }
    // Clean up
}
 public function testAPILimit()
 {
     $api_calls_to_leave_unmade_per_minute = 100;
     $archive_limit = 100;
     $num_twitter_errors = 100;
     $max_api_calls_per_crawl = 100;
     $api = new CrawlerTwitterAPIAccessorOAuth('an_oauth_access_token', 'an_oauth_access_token_secret', 'an_oauth_consumer_key', 'oauth_consumer_secret', $api_calls_to_leave_unmade_per_minute, $archive_limit, $num_twitter_errors, $max_api_calls_per_crawl);
     $api->init();
     // no caller limits;
     $i = 0;
     for ($i = 1; $i <= 10; $i++) {
         $api->apiRequest("/bad_url");
     }
     $this->assertEqual($i, 11);
     // with caller limits, 404 errors do count against limit
     $api->setCallerLimits(array('testAPILimit' => array('count' => 2, 'remaining' => 2)));
     $i = 0;
     try {
         for ($i = 0; $i <= 10; $i++) {
             $api->apiRequest("/bad_url");
         }
         $this->fail("should throw APICallLimitExceededException");
     } catch (APICallLimitExceededException $e) {
         $this->assertEqual($i, 2);
     }
     // with caller limits, 403 errors do count against limit
     $api->setCallerLimits(array('testAPILimit' => array('count' => 2, 'remaining' => 2)));
     $i = 0;
     try {
         for ($i = 0; $i <= 10; $i++) {
             $api->apiRequest("403");
         }
         $this->fail("should throw APICallLimitExceededException");
     } catch (APICallLimitExceededException $e) {
         $this->assertEqual($i, 2);
     }
     // all other errors shouldn't count againts caller limits
     foreach (array(405, 500, 502, 504) as $status) {
         $api->setCallerLimits(array('testAPILimit' => array('count' => 1, 'remaining' => 1)));
         $i = 0;
         for ($i = 0; $i <= 1; $i++) {
             $api->apiRequest($status);
         }
         $this->assertEqual($i, 2);
     }
 }
Пример #8
0
require_once 'config.crawler.inc.php';
ini_set("include_path", ini_get("include_path") . PATH_SEPARATOR . $INCLUDE_PATH);
require_once "init.php";
$db = new Database($THINKTANK_CFG);
$conn = $db->getConnection();
$logger = new Logger($THINKTANK_CFG['log_location']);
$id = new InstanceDAO($db, $logger);
$oid = new OwnerInstanceDAO($db, $logger);
$lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']);
$flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']);
$instances = $id->getAllInstancesStalestFirst();
foreach ($instances as $i) {
    $logger->setUsername($i->twitter_username);
    $tokens = $oid->getOAuthTokens($i->id);
    $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
    $crawler = new Crawler($i, $logger, $api, $db);
    $cfg = new Config($i->twitter_username, $i->twitter_user_id);
    $api->init($logger);
    if ($api->available_api_calls_for_crawler > 0) {
        $id->updateLastRun($i->id);
        $crawler->fetchInstanceUserInfo();
        $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi);
        $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi);
        $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi);
        $crawler->fetchInstanceUserFriends();
        $crawler->fetchInstanceUserFollowers();
        $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi);
        $crawler->fetchUnloadedFollowerDetails();
        $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi);
        // TODO: Get direct messages
Пример #9
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
     $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO');
     $owner_dao = DAOFactory::getDAO('OwnerDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser());
     $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         if (!$owner_instance_dao->doesOwnerHaveAccess($current_owner, $instance)) {
             // Owner doesn't have access to this instance; let's not crawl it.
             continue;
         }
         $logger->setUsername($instance->network_username);
         $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         $tokens = $owner_instance_dao->getOAuthTokens($instance->id);
         $noauth = true;
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         $max_api_calls_per_crawl = isset($options['max_api_calls_per_crawl']) ? $options['max_api_calls_per_crawl']->option_value : 350;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         $api_calls_to_leave_unmade_per_minute = isset($options['api_calls_to_leave_unmade_per_minute']) ? $options['api_calls_to_leave_unmade_per_minute']->option_value : 2.0;
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl);
         }
         $crawler = new TwitterCrawler($instance, $api);
         $api->init();
         if ($api->available_api_calls_for_crawler > 0) {
             $instance_dao->updateLastRun($instance->id);
             // No auth req'd
             //$crawler->fetchInstanceUserInfo();
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceFavorites();
                 $crawler->fetchInstanceUserFollowers();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->cleanUpMissedFavsUnFavs();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->fetchFriendTweetsAndFriends();
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             $crawler->cleanUpFollows();
             // Save instance
             if (isset($crawler->user)) {
                 $instance_dao->save($instance, $crawler->user->post_count, $logger);
             }
             $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__);
         }
     }
 }
 public function testParseError()
 {
     $to = new TwitterOAuth('', '', '', '');
     //Public statuses
     $twitter_data = $to->http('https://twitter.com/statuses/user_timeline/ginatrasdfasdfasdapani.xml?count=100');
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, 5, 3200, 5, 350);
     $results = $api->parseError($twitter_data);
     $this->debug(Utils::varDumpToString($results));
     $this->assertEqual($results['error'], 'Not found');
     $this->assertEqual($results['request'], '/statuses/user_timeline/ginatrasdfasdfasdapani.xml?count=100');
 }
 function testSearchResults()
 {
     global $THINKTANK_CFG;
     $r = array();
     $r["id"] = 0;
     $r['network_username'] = '******';
     $r['network_user_id'] = 0;
     $r['last_status_id'] = 0;
     $r['last_page_fetched_replies'] = 0;
     $r['last_page_fetched_tweets'] = 0;
     $r['total_posts_in_system'] = 0;
     $r['total_replies_in_system'] = 0;
     $r['total_follows_in_system'] = 0;
     $r['total_users_in_system'] = 0;
     $r['is_archive_loaded_replies'] = 0;
     $r['is_archive_loaded_follows'] = 0;
     $r['crawler_last_run'] = '1/1/2007';
     $r['earliest_reply_in_system'] = 0;
     $r['api_calls_to_leave_unmade_per_minute'] = 5;
     $r['avg_replies_per_day'] = 0;
     $r['is_public'] = 1;
     $r['is_active'] = 1;
     $r['network'] = 'twitter';
     $i = new Instance($r);
     $to = new TwitterOAuth('', '', '', '');
     $twitter_data = $to->noAuthRequest('http://search.twitter.com/search.json?q=%40whitehouse&result_type=recent');
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
     $results = $api->parseJSON($twitter_data);
     //print_r($results);
     $this->assertEqual($results[0]['post_id'], 11837318124.0);
 }
 public function testParseXMLStatusesPublic()
 {
     $to = new TwitterOAuth('', '', '', '');
     //Public statuses
     $twitter_data = $to->http('https://twitter.com/statuses/user_timeline/ginatrapani.xml?count=100');
     $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, 5, 3200, 5, 350);
     $results = $api->parseXML($twitter_data);
     $this->debug(Utils::varDumpToString($results));
     $this->assertEqual($results[0]['post_id'], '14846078418');
     $this->assertIsa($results[0]['is_protected'], 'int');
     $this->assertEqual($results[0]['is_protected'], 0);
 }
Пример #13
0
require_once 'config.crawler.inc.php';
ini_set("include_path", ini_get("include_path") . ":" . $INCLUDE_PATH);
require_once "init.php";
// Instantiate and initialize needed objects
$db = new Database();
$conn = $db->getConnection();
$id = new InstanceDAO();
$oid = new OwnerInstanceDAO();
$instances = $id->getAllInstancesStalestFirst();
foreach ($instances as $i) {
    $crawler = new Crawler($i);
    $cfg = new Config($i->twitter_username, $i->twitter_user_id);
    $logger = new Logger($i->twitter_username);
    $tokens = $oid->getOAuthTokens($i->id);
    $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $cfg, $i);
    $api->init($logger);
    if ($api->available_api_calls_for_crawler > 0) {
        $id->updateLastRun($i->id);
        $crawler->fetchInstanceUserInfo($cfg, $api, $logger);
        $crawler->fetchInstanceUserTweets($cfg, $api, $logger);
        $crawler->fetchInstanceUserReplies($cfg, $api, $logger);
        $crawler->fetchInstanceUserFriends($cfg, $api, $logger);
        $crawler->fetchInstanceUserFollowers($cfg, $api, $logger);
        $crawler->fetchStrayRepliedToTweets($cfg, $api, $logger);
        $crawler->fetchUnloadedFollowerDetails($cfg, $api, $logger);
        $crawler->fetchFriendTweetsAndFriends($cfg, $api, $logger);
        // TODO: Get direct messages
        // TODO: Gather favorites data
        $crawler->cleanUpFollows($cfg, $api, $logger);
        // Save instance
Пример #14
0
 public function crawl()
 {
     $config = Config::getInstance();
     $logger = Logger::getInstance();
     $id = DAOFactory::getDAO('InstanceDAO');
     $oid = DAOFactory::getDAO('OwnerInstanceDAO');
     $od = DAOFactory::getDAO('OwnerDAO');
     // get oauth values
     $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
     $options = $plugin_option_dao->getOptionsHash('twitter', true);
     $current_owner = $od->getByEmail(Session::getLoggedInUser());
     $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter');
     foreach ($instances as $instance) {
         if (!$oid->doesOwnerHaveAccess($current_owner, $instance)) {
             // Owner doesn't have access to this instance; let's not crawl it.
             continue;
         }
         $logger->setUsername($instance->network_username);
         $tokens = $oid->getOAuthTokens($instance->id);
         $noauth = true;
         $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null;
         if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
             $noauth = false;
         }
         if ($noauth) {
             $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors);
         } else {
             $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors);
         }
         $crawler = new TwitterCrawler($instance, $api);
         $api->init();
         if ($api->available_api_calls_for_crawler > 0) {
             $id->updateLastRun($instance->id);
             // No auth req'd
             //$crawler->fetchInstanceUserInfo();
             // No auth for public Twitter users
             $crawler->fetchInstanceUserTweets();
             if (!$noauth) {
                 // Auth req'd, for calling user only
                 $crawler->fetchInstanceUserMentions();
                 $crawler->fetchRetweetsOfInstanceUser();
                 $crawler->fetchInstanceUserFriends();
                 $crawler->fetchInstanceUserFollowers();
             }
             $crawler->fetchStrayRepliedToTweets();
             $crawler->fetchUnloadedFollowerDetails();
             $crawler->fetchFriendTweetsAndFriends();
             //@TODO Gather favorites data
             if ($noauth) {
                 // No auth req'd
                 $crawler->fetchSearchResults($instance->network_username);
             }
             $crawler->cleanUpFollows();
             // Save instance
             if (isset($crawler->owner_object)) {
                 $id->save($instance, $crawler->owner_object->post_count, $logger);
             }
         }
     }
     $logger->close();
     # Close logging
 }