function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $noauth = true; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); // No auth req'd $crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserRetweetsByMe(); // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFriends(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFollowers(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); // TODO: Get direct messages // TODO: Gather favorites data if ($noauth) { // No auth req'd $crawler->fetchSearchResults($i->network_username); } $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging }
function testFetchInstanceUserInfo() { $tc = new TwitterCrawler($this->instance, $this->logger, $this->api, $this->db); $tc->fetchInstanceUserInfo(); $udao = new UserDAO($this->db, $this->logger); $user = $udao->getDetails(36823); $this->assertTrue($user->id == 1); $this->assertTrue($user->user_id == 36823); $this->assertTrue($user->username == 'anildash'); $this->assertTrue($user->found_in == 'Owner Status'); }
function testFetchSearchResults() { $this->api->available = true; $this->api->available_api_calls_for_crawler = 1; $tc = new TwitterCrawler($this->instance, $this->logger, $this->api, $this->db); $tc->fetchInstanceUserInfo(); $tc->fetchSearchResults('@whitehouse'); $pdao = new PostDAO($this->db, $this->logger); $this->assertTrue($pdao->isPostInDB(11841192840.0)); $post = $pdao->getPost(11841192840.0); $this->assertEqual($post->post_text, "RT @CindyPDX: @whitehouse PLS send to my President: http://familiesofautistickids.ning.com/video/through-my-eyes-thanh-bui <Does he remember our son?"); }
function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']); $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']); $instances = $id->getAllActiveInstancesStalestFirst(); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo(); $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi); $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi); $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi); // TODO: Get direct messages // TODO: Gather favorites data $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging if (isset($conn)) { $db->closeConnection($conn); } // Clean up }
public function testAddRmOldFavMaintSearch() { $this->logger->logInfo("in testAddRmOldFavMaintSearch", __METHOD__ . ',' . __LINE__); //set plugin cfg values $namespace = OptionDAO::PLUGIN_OPTIONS . '-1'; $builder2 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_older_pages', 'option_value' => 1)); $builder3 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_cleanup_pages', 'option_value' => 3)); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); self::setUpInstanceUserAmygdala(); $this->api->available_api_calls_for_crawler = 3; $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage3/'); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->fetchInstanceUserInfo(); $retval = $twitter_crawler->cleanUpMissedFavsUnFavs(); $this->assertEqual($retval, true); $this->assertEqual($this->instance->last_unfav_page_checked, 3); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger); } $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter"); // check fav count $this->assertEqual($this->instance->owner_favs_in_system, 40); $this->logger->logInfo("in testAddRmOldFavMaintSearch, second traversal", __METHOD__ . ',' . __LINE__); // now add an additional older fav , remove one, and traverse again $this->api->available_api_calls_for_crawler = 3; $this->instance->last_unfav_page_checked = 2; $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage6/'); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->fetchInstanceUserInfo(); $retval = $twitter_crawler->cleanUpMissedFavsUnFavs(); $this->assertEqual($retval, true); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger); } $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter"); // check fav count- should have removed 2 and added 21... // update: due to issue with TwitterAPI, not currently removing un-favs from database // $this->assertEqual($this->instance->owner_favs_in_system, 59); $this->assertEqual($this->instance->owner_favs_in_system, 61); $builder2 = null; $builder3 = null; }
public function testFetchStrayRepliedToTweets() { self::setUpInstanceUserAnilDash(); $this->api->available_api_calls_for_crawler = 4; $tc = new TwitterCrawler($this->instance, $this->api); $tc->fetchInstanceUserInfo(); $tc->fetchInstanceUserTweets(); $pdao = DAOFactory::getDAO('PostDAO'); $tweets = $pdao->getAllPostsByUsername('anildash', 'twitter'); $tc->fetchStrayRepliedToTweets(); $post = $pdao->getPost(15752814831.0, 'twitter'); $this->assertTrue(isset($post)); $this->assertEqual($post->reply_count_cache, 1); }
function testFetchRetweetsOfInstanceuser() { self::setUpInstanceUserGinaTrapani(); $tc = new TwitterCrawler($this->instance, $this->api, $this->db); $tc->fetchInstanceUserInfo(); //first, load retweeted tweet into db $q = "INSERT INTO tt_posts (post_id, author_user_id, author_username, author_fullname, author_avatar, post_text, source, pub_date, mention_count_cache, retweet_count_cache) VALUES (14947487415, 930061, 'ginatrapani', 'Gina Trapani', 'avatar.jpg', '"Wearing your new conference tee shirt does NOT count as dressing up."', 'web', '2006-01-01 00:00:00', " . rand(0, 4) . ", 0);"; $this->db->exec($q); $pdao = DAOFactory::getDAO('PostDAO'); $this->db; $tc->fetchRetweetsOfInstanceUser(); $post = $pdao->getPost(14947487415.0); $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded'); $retweets = $pdao->getRetweetsOfPost(14947487415.0, true); $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded'); //make sure duplicate posts aren't going into the db on next crawler run self::setUpInstanceUserGinaTrapani(); $tc = new TwitterCrawler($this->instance, $this->api, $this->db); $tc->fetchInstanceUserInfo(); $tc->fetchRetweetsOfInstanceUser(); $post = $pdao->getPost(14947487415.0); $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded'); $retweets = $pdao->getRetweetsOfPost(14947487415.0, true); $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded'); }
public function testFetchRetweetsOfInstanceUser() { $this->debug(__METHOD__); self::setUpInstanceUserGinaTrapani(); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/'); //first, load retweeted tweet into db // we now get the 'new-style' retweet count from the retweet_count field in the xml, // which is parsed into 'retweet_count_cache' in the post vals. This will not necessarily match // the number of retweets in the database any more (but does in this test case). $builder = FixtureBuilder::build('posts', array('post_id' => '300000912989118466', 'author_user_id' => '930061', 'author_username' => 'ginatrapani', 'author_fullname' => 'Gina Trapani', 'post_text' => '@jjg unsurprisingly Dykes Lumber in Brooklyn has a thriving t-shirt business', 'pub_date' => '-1d', 'reply_count_cache' => 1, 'old_retweet_count_cache' => 0, 'retweet_count_cache' => 0, 'retweet_count_api' => 0)); $post_dao = DAOFactory::getDAO('PostDAO'); $twitter_crawler->fetchRetweetsOfInstanceUser(); $post = $post_dao->getPost('300000912989118466', 'twitter'); $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache'); // in processing the retweets of the post, if they contain a <retweeted_status> element pointing // to the original post, and that original post information includes a retweet count, we will update the // original post in the db with that count. In this test data that count is 2, 'behind' the database info. $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API'); // should not have processed any old-style retweets here $this->assertEqual($post->old_retweet_count_cache, 0, '0 old-style retweets count from API'); $retweets = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true); $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded'); //make sure duplicate posts aren't going into the db on next crawler run self::setUpInstanceUserGinaTrapani(); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/'); $twitter_crawler->fetchInstanceUserInfo(); $twitter_crawler->fetchRetweetsOfInstanceUser(); $post = $post_dao->getPost('300000912989118466', 'twitter'); $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache'); $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API'); $retweets = $post_dao->getRetweetsOfPost('300000912989118466', 'twitter', true); $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded'); $post = $post_dao->getPost('300000311127457792', 'twitter'); $rts2 = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true); $this->assertEqual(sizeof($rts2), 0, '0 retweets loaded'); //$this->assertEqual($rts2[0]->in_rt_of_user_id, '930061'); }