function testIDsList() { global $THINKTANK_CFG; $to = new TwitterOAuth('', '', '', ''); $result = $to->oAuthRequest('https://twitter.com/followers/ids.xml', array(), 'GET'); $r = array(); $r["id"] = 0; $r['network_username'] = '******'; $r['network_user_id'] = 0; $r['last_status_id'] = 0; $r['last_page_fetched_replies'] = 0; $r['last_page_fetched_tweets'] = 0; $r['total_posts_in_system'] = 0; $r['total_replies_in_system'] = 0; $r['total_follows_in_system'] = 0; $r['total_users_in_system'] = 0; $r['is_archive_loaded_replies'] = 0; $r['is_archive_loaded_follows'] = 0; $r['crawler_last_run'] = '1/1/2007'; $r['earliest_reply_in_system'] = 0; $r['api_calls_to_leave_unmade_per_minute'] = 5; $r['avg_replies_per_day'] = 0; $r['is_public'] = 1; $r['is_active'] = 1; $r['network'] = 'twitter'; $i = new Instance($r); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $users = $api->parseXML($result); $next_cursor = $api->getNextCursor(); //echo 'Next cursor is ' . $next_cursor; $this->assertTrue($next_cursor == '1326272872342936860'); }
public function testInitializeEndpointRateLimits() { $this->debug(__METHOD__); $api = new CrawlerTwitterAPIAccessorOAuth($oauth_token = '111', $oauth_token_secret = '222', $oauth_consumer_key = 1234, $oauth_consumer_secret = 4567, $archive_limit = 3200, $num_twitter_errors = 5); $api->to->setDataPathFolder('testofcrawlertwitterapiaccessoroauth/testinitializeendpointratelimits/'); $api->initializeEndpointRateLimits(); $this->assertEqual($api->endpoints["mentions"]->getRemaining(), 15); $this->assertEqual($api->endpoints["mentions"]->getReset(), 1361069069); }
function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $noauth = true; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); // No auth req'd $crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserRetweetsByMe(); // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFriends(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFollowers(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); // TODO: Get direct messages // TODO: Gather favorites data if ($noauth) { // No auth req'd $crawler->fetchSearchResults($i->network_username); } $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging }
function testIDsList() { $to = new TwitterOAuth('', '', '', ''); $result = $to->oAuthRequest('https://twitter.com/followers/ids.xml', array(), 'GET'); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $users = $api->parseXML($result); $next_cursor = $api->getNextCursor(); //echo 'Next cursor is ' . $next_cursor; $this->assertTrue($next_cursor == '1326272872342936860'); }
public function testSearchResults() { global $THINKUP_CFG; $to = new TwitterOAuth('', '', '', ''); $twitter_data = $to->http('http://search.twitter.com/search.json?q=%40whitehouse&result_type=recent'); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, $this->getTestInstance(), 3200); $results = $api->parseJSON($twitter_data); //print_r($results); $this->assertEqual($results[0]['post_id'], 11837318124); }
function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']); $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']); $instances = $id->getAllActiveInstancesStalestFirst(); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo(); $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi); $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi); $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi); // TODO: Get direct messages // TODO: Gather favorites data $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging if (isset($conn)) { $db->closeConnection($conn); } // Clean up }
public function testAPILimit() { $api_calls_to_leave_unmade_per_minute = 100; $archive_limit = 100; $num_twitter_errors = 100; $max_api_calls_per_crawl = 100; $api = new CrawlerTwitterAPIAccessorOAuth('an_oauth_access_token', 'an_oauth_access_token_secret', 'an_oauth_consumer_key', 'oauth_consumer_secret', $api_calls_to_leave_unmade_per_minute, $archive_limit, $num_twitter_errors, $max_api_calls_per_crawl); $api->init(); // no caller limits; $i = 0; for ($i = 1; $i <= 10; $i++) { $api->apiRequest("/bad_url"); } $this->assertEqual($i, 11); // with caller limits, 404 errors do count against limit $api->setCallerLimits(array('testAPILimit' => array('count' => 2, 'remaining' => 2))); $i = 0; try { for ($i = 0; $i <= 10; $i++) { $api->apiRequest("/bad_url"); } $this->fail("should throw APICallLimitExceededException"); } catch (APICallLimitExceededException $e) { $this->assertEqual($i, 2); } // with caller limits, 403 errors do count against limit $api->setCallerLimits(array('testAPILimit' => array('count' => 2, 'remaining' => 2))); $i = 0; try { for ($i = 0; $i <= 10; $i++) { $api->apiRequest("403"); } $this->fail("should throw APICallLimitExceededException"); } catch (APICallLimitExceededException $e) { $this->assertEqual($i, 2); } // all other errors shouldn't count againts caller limits foreach (array(405, 500, 502, 504) as $status) { $api->setCallerLimits(array('testAPILimit' => array('count' => 1, 'remaining' => 1))); $i = 0; for ($i = 0; $i <= 1; $i++) { $api->apiRequest($status); } $this->assertEqual($i, 2); } }
require_once 'config.crawler.inc.php'; ini_set("include_path", ini_get("include_path") . PATH_SEPARATOR . $INCLUDE_PATH); require_once "init.php"; $db = new Database($THINKTANK_CFG); $conn = $db->getConnection(); $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']); $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']); $instances = $id->getAllInstancesStalestFirst(); foreach ($instances as $i) { $logger->setUsername($i->twitter_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $crawler = new Crawler($i, $logger, $api, $db); $cfg = new Config($i->twitter_username, $i->twitter_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo(); $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi); $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi); $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi); // TODO: Get direct messages
public function crawl() { $config = Config::getInstance(); $logger = Logger::getInstance(); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); // get oauth values $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('twitter', true); $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $instance) { if (!$owner_instance_dao->doesOwnerHaveAccess($current_owner, $instance)) { // Owner doesn't have access to this instance; let's not crawl it. continue; } $logger->setUsername($instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $noauth = true; $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null; $max_api_calls_per_crawl = isset($options['max_api_calls_per_crawl']) ? $options['max_api_calls_per_crawl']->option_value : 350; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } $api_calls_to_leave_unmade_per_minute = isset($options['api_calls_to_leave_unmade_per_minute']) ? $options['api_calls_to_leave_unmade_per_minute']->option_value : 2.0; if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl); } $crawler = new TwitterCrawler($instance, $api); $api->init(); if ($api->available_api_calls_for_crawler > 0) { $instance_dao->updateLastRun($instance->id); // No auth req'd //$crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceFavorites(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchRetweetsOfInstanceUser(); $crawler->cleanUpMissedFavsUnFavs(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); if ($noauth) { // No auth req'd $crawler->fetchSearchResults($instance->network_username); } $crawler->cleanUpFollows(); // Save instance if (isset($crawler->user)) { $instance_dao->save($instance, $crawler->user->post_count, $logger); } $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); } } }
public function testParseError() { $to = new TwitterOAuth('', '', '', ''); //Public statuses $twitter_data = $to->http('https://twitter.com/statuses/user_timeline/ginatrasdfasdfasdapani.xml?count=100'); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, 5, 3200, 5, 350); $results = $api->parseError($twitter_data); $this->debug(Utils::varDumpToString($results)); $this->assertEqual($results['error'], 'Not found'); $this->assertEqual($results['request'], '/statuses/user_timeline/ginatrasdfasdfasdapani.xml?count=100'); }
function testSearchResults() { global $THINKTANK_CFG; $r = array(); $r["id"] = 0; $r['network_username'] = '******'; $r['network_user_id'] = 0; $r['last_status_id'] = 0; $r['last_page_fetched_replies'] = 0; $r['last_page_fetched_tweets'] = 0; $r['total_posts_in_system'] = 0; $r['total_replies_in_system'] = 0; $r['total_follows_in_system'] = 0; $r['total_users_in_system'] = 0; $r['is_archive_loaded_replies'] = 0; $r['is_archive_loaded_follows'] = 0; $r['crawler_last_run'] = '1/1/2007'; $r['earliest_reply_in_system'] = 0; $r['api_calls_to_leave_unmade_per_minute'] = 5; $r['avg_replies_per_day'] = 0; $r['is_public'] = 1; $r['is_active'] = 1; $r['network'] = 'twitter'; $i = new Instance($r); $to = new TwitterOAuth('', '', '', ''); $twitter_data = $to->noAuthRequest('http://search.twitter.com/search.json?q=%40whitehouse&result_type=recent'); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $results = $api->parseJSON($twitter_data); //print_r($results); $this->assertEqual($results[0]['post_id'], 11837318124.0); }
public function testParseXMLStatusesPublic() { $to = new TwitterOAuth('', '', '', ''); //Public statuses $twitter_data = $to->http('https://twitter.com/statuses/user_timeline/ginatrapani.xml?count=100'); $api = new CrawlerTwitterAPIAccessorOAuth('111', '222', 1234, 1234, 5, 3200, 5, 350); $results = $api->parseXML($twitter_data); $this->debug(Utils::varDumpToString($results)); $this->assertEqual($results[0]['post_id'], '14846078418'); $this->assertIsa($results[0]['is_protected'], 'int'); $this->assertEqual($results[0]['is_protected'], 0); }
require_once 'config.crawler.inc.php'; ini_set("include_path", ini_get("include_path") . ":" . $INCLUDE_PATH); require_once "init.php"; // Instantiate and initialize needed objects $db = new Database(); $conn = $db->getConnection(); $id = new InstanceDAO(); $oid = new OwnerInstanceDAO(); $instances = $id->getAllInstancesStalestFirst(); foreach ($instances as $i) { $crawler = new Crawler($i); $cfg = new Config($i->twitter_username, $i->twitter_user_id); $logger = new Logger($i->twitter_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $cfg, $i); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo($cfg, $api, $logger); $crawler->fetchInstanceUserTweets($cfg, $api, $logger); $crawler->fetchInstanceUserReplies($cfg, $api, $logger); $crawler->fetchInstanceUserFriends($cfg, $api, $logger); $crawler->fetchInstanceUserFollowers($cfg, $api, $logger); $crawler->fetchStrayRepliedToTweets($cfg, $api, $logger); $crawler->fetchUnloadedFollowerDetails($cfg, $api, $logger); $crawler->fetchFriendTweetsAndFriends($cfg, $api, $logger); // TODO: Get direct messages // TODO: Gather favorites data $crawler->cleanUpFollows($cfg, $api, $logger); // Save instance
public function crawl() { $config = Config::getInstance(); $logger = Logger::getInstance(); $id = DAOFactory::getDAO('InstanceDAO'); $oid = DAOFactory::getDAO('OwnerInstanceDAO'); $od = DAOFactory::getDAO('OwnerDAO'); // get oauth values $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('twitter', true); $current_owner = $od->getByEmail(Session::getLoggedInUser()); $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $instance) { if (!$oid->doesOwnerHaveAccess($current_owner, $instance)) { // Owner doesn't have access to this instance; let's not crawl it. continue; } $logger->setUsername($instance->network_username); $tokens = $oid->getOAuthTokens($instance->id); $noauth = true; $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $instance, $options['archive_limit']->option_value, $num_twitter_errors); } $crawler = new TwitterCrawler($instance, $api); $api->init(); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($instance->id); // No auth req'd //$crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); $crawler->fetchRetweetsOfInstanceUser(); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); //@TODO Gather favorites data if ($noauth) { // No auth req'd $crawler->fetchSearchResults($instance->network_username); } $crawler->cleanUpFollows(); // Save instance if (isset($crawler->owner_object)) { $id->save($instance, $crawler->owner_object->post_count, $logger); } } } $logger->close(); # Close logging }