コード例 #1
0
ファイル: twitter.php プロジェクト: ukd1/thinktank
function twitter_crawl()
{
    global $THINKTANK_CFG;
    global $db;
    global $conn;
    $logger = new Logger($THINKTANK_CFG['log_location']);
    $id = new InstanceDAO($db, $logger);
    $oid = new OwnerInstanceDAO($db, $logger);
    $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter');
    foreach ($instances as $i) {
        $logger->setUsername($i->network_username);
        $tokens = $oid->getOAuthTokens($i->id);
        $noauth = true;
        if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') {
            $noauth = false;
        }
        if ($noauth) {
            $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        } else {
            $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        }
        $crawler = new TwitterCrawler($i, $logger, $api, $db);
        $cfg = new Config($i->network_username, $i->network_user_id);
        $api->init($logger);
        if ($api->available_api_calls_for_crawler > 0) {
            $id->updateLastRun($i->id);
            // No auth req'd
            $crawler->fetchInstanceUserInfo();
            // No auth for public Twitter users
            $crawler->fetchInstanceUserTweets();
            if (!$noauth) {
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserRetweetsByMe();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserMentions();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserFriends();
                // Auth req'd, for calling user only
                $crawler->fetchInstanceUserFollowers();
            }
            $crawler->fetchStrayRepliedToTweets();
            $crawler->fetchUnloadedFollowerDetails();
            $crawler->fetchFriendTweetsAndFriends();
            // TODO: Get direct messages
            // TODO: Gather favorites data
            if ($noauth) {
                // No auth req'd
                $crawler->fetchSearchResults($i->network_username);
            }
            $crawler->cleanUpFollows();
            // Save instance
            $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api);
        }
    }
    $logger->close();
    # Close logging
}
コード例 #2
0
 function testFetchInstanceUserInfo()
 {
     $tc = new TwitterCrawler($this->instance, $this->logger, $this->api, $this->db);
     $tc->fetchInstanceUserInfo();
     $udao = new UserDAO($this->db, $this->logger);
     $user = $udao->getDetails(36823);
     $this->assertTrue($user->id == 1);
     $this->assertTrue($user->user_id == 36823);
     $this->assertTrue($user->username == 'anildash');
     $this->assertTrue($user->found_in == 'Owner Status');
 }
コード例 #3
0
 function testFetchSearchResults()
 {
     $this->api->available = true;
     $this->api->available_api_calls_for_crawler = 1;
     $tc = new TwitterCrawler($this->instance, $this->logger, $this->api, $this->db);
     $tc->fetchInstanceUserInfo();
     $tc->fetchSearchResults('@whitehouse');
     $pdao = new PostDAO($this->db, $this->logger);
     $this->assertTrue($pdao->isPostInDB(11841192840.0));
     $post = $pdao->getPost(11841192840.0);
     $this->assertEqual($post->post_text, "RT @CindyPDX: @whitehouse PLS send to my President: http://familiesofautistickids.ning.com/video/through-my-eyes-thanh-bui  <Does he remember our son?");
 }
コード例 #4
0
ファイル: twitter.php プロジェクト: jrunning/thinktank
function twitter_crawl()
{
    global $THINKTANK_CFG;
    global $db;
    global $conn;
    $logger = new Logger($THINKTANK_CFG['log_location']);
    $id = new InstanceDAO($db, $logger);
    $oid = new OwnerInstanceDAO($db, $logger);
    $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']);
    $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']);
    $instances = $id->getAllActiveInstancesStalestFirst();
    foreach ($instances as $i) {
        $logger->setUsername($i->network_username);
        $tokens = $oid->getOAuthTokens($i->id);
        $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']);
        $crawler = new TwitterCrawler($i, $logger, $api, $db);
        $cfg = new Config($i->network_username, $i->network_user_id);
        $api->init($logger);
        if ($api->available_api_calls_for_crawler > 0) {
            $id->updateLastRun($i->id);
            $crawler->fetchInstanceUserInfo();
            $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi);
            $crawler->fetchInstanceUserFriends();
            $crawler->fetchInstanceUserFollowers();
            $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi);
            $crawler->fetchUnloadedFollowerDetails();
            $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi);
            // TODO: Get direct messages
            // TODO: Gather favorites data
            $crawler->cleanUpFollows();
            // Save instance
            $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api);
        }
    }
    $logger->close();
    # Close logging
    if (isset($conn)) {
        $db->closeConnection($conn);
    }
    // Clean up
}
コード例 #5
0
 public function testAddRmOldFavMaintSearch()
 {
     $this->logger->logInfo("in testAddRmOldFavMaintSearch", __METHOD__ . ',' . __LINE__);
     //set plugin cfg values
     $namespace = OptionDAO::PLUGIN_OPTIONS . '-1';
     $builder2 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_older_pages', 'option_value' => 1));
     $builder3 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_cleanup_pages', 'option_value' => 3));
     $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
     self::setUpInstanceUserAmygdala();
     $this->api->available_api_calls_for_crawler = 3;
     $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage3/');
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->fetchInstanceUserInfo();
     $retval = $twitter_crawler->cleanUpMissedFavsUnFavs();
     $this->assertEqual($retval, true);
     $this->assertEqual($this->instance->last_unfav_page_checked, 3);
     // Save instance
     if (isset($twitter_crawler->user)) {
         $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger);
     }
     $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter");
     // check fav count
     $this->assertEqual($this->instance->owner_favs_in_system, 40);
     $this->logger->logInfo("in testAddRmOldFavMaintSearch, second traversal", __METHOD__ . ',' . __LINE__);
     // now add an additional older fav , remove one, and traverse again
     $this->api->available_api_calls_for_crawler = 3;
     $this->instance->last_unfav_page_checked = 2;
     $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage6/');
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->fetchInstanceUserInfo();
     $retval = $twitter_crawler->cleanUpMissedFavsUnFavs();
     $this->assertEqual($retval, true);
     // Save instance
     if (isset($twitter_crawler->user)) {
         $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger);
     }
     $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter");
     // check fav count- should have removed 2 and added 21...
     // update: due to issue with TwitterAPI, not currently removing un-favs from database
     // $this->assertEqual($this->instance->owner_favs_in_system, 59);
     $this->assertEqual($this->instance->owner_favs_in_system, 61);
     $builder2 = null;
     $builder3 = null;
 }
コード例 #6
0
 public function testFetchStrayRepliedToTweets()
 {
     self::setUpInstanceUserAnilDash();
     $this->api->available_api_calls_for_crawler = 4;
     $tc = new TwitterCrawler($this->instance, $this->api);
     $tc->fetchInstanceUserInfo();
     $tc->fetchInstanceUserTweets();
     $pdao = DAOFactory::getDAO('PostDAO');
     $tweets = $pdao->getAllPostsByUsername('anildash', 'twitter');
     $tc->fetchStrayRepliedToTweets();
     $post = $pdao->getPost(15752814831.0, 'twitter');
     $this->assertTrue(isset($post));
     $this->assertEqual($post->reply_count_cache, 1);
 }
コード例 #7
0
 function testFetchRetweetsOfInstanceuser()
 {
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api, $this->db);
     $tc->fetchInstanceUserInfo();
     //first, load retweeted tweet into db
     $q = "INSERT INTO tt_posts (post_id, author_user_id, author_username, author_fullname, author_avatar, post_text, source, pub_date, mention_count_cache, retweet_count_cache) VALUES (14947487415, 930061, 'ginatrapani', 'Gina Trapani', 'avatar.jpg', '"Wearing your new conference tee shirt does NOT count as dressing up."', 'web', '2006-01-01 00:00:00', " . rand(0, 4) . ", 0);";
     $this->db->exec($q);
     $pdao = DAOFactory::getDAO('PostDAO');
     $this->db;
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0);
     $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
     //make sure duplicate posts aren't going into the db on next crawler run
     self::setUpInstanceUserGinaTrapani();
     $tc = new TwitterCrawler($this->instance, $this->api, $this->db);
     $tc->fetchInstanceUserInfo();
     $tc->fetchRetweetsOfInstanceUser();
     $post = $pdao->getPost(14947487415.0);
     $this->assertEqual($post->retweet_count_cache, 3, '3 retweets loaded');
     $retweets = $pdao->getRetweetsOfPost(14947487415.0, true);
     $this->assertEqual(sizeof($retweets), 3, '3 retweets loaded');
 }
コード例 #8
0
 public function testFetchRetweetsOfInstanceUser()
 {
     $this->debug(__METHOD__);
     self::setUpInstanceUserGinaTrapani();
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/');
     //first, load retweeted tweet into db
     // we now get the 'new-style' retweet count from the retweet_count field in the xml,
     // which is parsed into 'retweet_count_cache' in the post vals.  This will not necessarily match
     // the number of retweets in the database any more (but does in this test case).
     $builder = FixtureBuilder::build('posts', array('post_id' => '300000912989118466', 'author_user_id' => '930061', 'author_username' => 'ginatrapani', 'author_fullname' => 'Gina Trapani', 'post_text' => '@jjg unsurprisingly Dykes Lumber in Brooklyn has a thriving t-shirt business', 'pub_date' => '-1d', 'reply_count_cache' => 1, 'old_retweet_count_cache' => 0, 'retweet_count_cache' => 0, 'retweet_count_api' => 0));
     $post_dao = DAOFactory::getDAO('PostDAO');
     $twitter_crawler->fetchRetweetsOfInstanceUser();
     $post = $post_dao->getPost('300000912989118466', 'twitter');
     $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache');
     // in processing the retweets of the post, if they contain a <retweeted_status> element pointing
     // to the original post, and that original post information includes a retweet count, we will update the
     // original post in the db with that count.  In this test data that count is 2, 'behind' the database info.
     $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API');
     // should not have processed any old-style retweets here
     $this->assertEqual($post->old_retweet_count_cache, 0, '0 old-style retweets count from API');
     $retweets = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true);
     $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded');
     //make sure duplicate posts aren't going into the db on next crawler run
     self::setUpInstanceUserGinaTrapani();
     $twitter_crawler = new TwitterCrawler($this->instance, $this->api);
     $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/ginatrapani/');
     $twitter_crawler->fetchInstanceUserInfo();
     $twitter_crawler->fetchRetweetsOfInstanceUser();
     $post = $post_dao->getPost('300000912989118466', 'twitter');
     $this->assertEqual($post->retweet_count_cache, 1, '1 new-style retweet from count cache');
     $this->assertEqual($post->retweet_count_api, 1, '1 new-style retweet count from API');
     $retweets = $post_dao->getRetweetsOfPost('300000912989118466', 'twitter', true);
     $this->assertEqual(sizeof($retweets), 0, '0 retweets loaded');
     $post = $post_dao->getPost('300000311127457792', 'twitter');
     $rts2 = $post_dao->getRetweetsOfPost('300000311127457792', 'twitter', true);
     $this->assertEqual(sizeof($rts2), 0, '0 retweets loaded');
     //$this->assertEqual($rts2[0]->in_rt_of_user_id, '930061');
 }