public function testAddRmOldFavMaintSearch() { $this->logger->logInfo("in testAddRmOldFavMaintSearch", __METHOD__ . ',' . __LINE__); //set plugin cfg values $namespace = OptionDAO::PLUGIN_OPTIONS . '-1'; $builder2 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_older_pages', 'option_value' => 1)); $builder3 = FixtureBuilder::build('options', array('namespace' => $namespace, 'option_name' => 'favs_cleanup_pages', 'option_value' => 3)); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); self::setUpInstanceUserAmygdala(); $this->api->available_api_calls_for_crawler = 3; $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage3/'); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->fetchInstanceUserInfo(); $retval = $twitter_crawler->cleanUpMissedFavsUnFavs(); $this->assertEqual($retval, true); $this->assertEqual($this->instance->last_unfav_page_checked, 3); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger); } $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter"); // check fav count $this->assertEqual($this->instance->owner_favs_in_system, 40); $this->logger->logInfo("in testAddRmOldFavMaintSearch, second traversal", __METHOD__ . ',' . __LINE__); // now add an additional older fav , remove one, and traverse again $this->api->available_api_calls_for_crawler = 3; $this->instance->last_unfav_page_checked = 2; $this->api->to->setDataPath('webapp/plugins/twitter/tests/testdata/favs_tests/favs_stage6/'); $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->fetchInstanceUserInfo(); $retval = $twitter_crawler->cleanUpMissedFavsUnFavs(); $this->assertEqual($retval, true); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($this->instance, $twitter_crawler->user->post_count, $this->logger); } $this->instance = $instance_dao->getByUsernameOnNetwork("amygdala", "twitter"); // check fav count- should have removed 2 and added 21... // update: due to issue with TwitterAPI, not currently removing un-favs from database // $this->assertEqual($this->instance->owner_favs_in_system, 59); $this->assertEqual($this->instance->owner_favs_in_system, 61); $builder2 = null; $builder3 = null; }
public function crawl() { $config = Config::getInstance(); $logger = Logger::getInstance(); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); // get oauth values $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('twitter', true); $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $instance) { if (!$owner_instance_dao->doesOwnerHaveAccess($current_owner, $instance)) { // Owner doesn't have access to this instance; let's not crawl it. continue; } $logger->setUsername($instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $noauth = true; $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null; $max_api_calls_per_crawl = isset($options['max_api_calls_per_crawl']) ? $options['max_api_calls_per_crawl']->option_value : 350; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } $api_calls_to_leave_unmade_per_minute = isset($options['api_calls_to_leave_unmade_per_minute']) ? $options['api_calls_to_leave_unmade_per_minute']->option_value : 2.0; if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $api_calls_to_leave_unmade_per_minute, $options['archive_limit']->option_value, $num_twitter_errors, $max_api_calls_per_crawl); } $crawler = new TwitterCrawler($instance, $api); $api->init(); if ($api->available_api_calls_for_crawler > 0) { $instance_dao->updateLastRun($instance->id); // No auth req'd //$crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceFavorites(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchRetweetsOfInstanceUser(); $crawler->cleanUpMissedFavsUnFavs(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); if ($noauth) { // No auth req'd $crawler->fetchSearchResults($instance->network_username); } $crawler->cleanUpFollows(); // Save instance if (isset($crawler->user)) { $instance_dao->save($instance, $crawler->user->post_count, $logger); } $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); } } }