public function crawl() { $config = Config::getInstance(); $logger = Logger::getInstance(); $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO'); $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_dao = DAOFactory::getDAO('OwnerDAO'); $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO'); // get oauth values $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $options = $plugin_option_dao->getOptionsHash('twitter', true); $current_owner = $owner_dao->getByEmail(Session::getLoggedInUser()); $instances = $instance_dao->getActiveInstancesStalestFirstForOwnerByNetworkNoAuthError($current_owner, 'twitter'); foreach ($instances as $instance) { $logger->setUsername($instance->network_username); $logger->logUserSuccess("Starting to collect data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); $tokens = $owner_instance_dao->getOAuthTokens($instance->id); $num_twitter_errors = isset($options['num_twitter_errors']) ? $options['num_twitter_errors']->option_value : null; $dashboard_module_cacher = new DashboardModuleCacher($instance); try { if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $archive_limit = isset($options['archive_limit']->option_value) ? $options['archive_limit']->option_value : 3200; $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $options['oauth_consumer_key']->option_value, $options['oauth_consumer_secret']->option_value, $archive_limit, $num_twitter_errors); $twitter_crawler = new TwitterCrawler($instance, $api); $instance_dao->updateLastRun($instance->id); $twitter_crawler->fetchInstanceUserTweets(); $twitter_crawler->fetchInstanceUserMentions(); $twitter_crawler->fetchInstanceUserFriends(); $twitter_crawler->fetchInstanceUserFollowers(); $twitter_crawler->fetchInstanceUserGroups(); $twitter_crawler->fetchRetweetsOfInstanceUser(); $twitter_crawler->fetchInstanceUserFavorites(); $twitter_crawler->updateStaleGroupMemberships(); $twitter_crawler->fetchStrayRepliedToTweets(); $twitter_crawler->fetchUserFriendsByIDs(); $twitter_crawler->fetchUnloadedFriendDetails(); $twitter_crawler->fetchUnloadedFollowerDetails(); $twitter_crawler->cleanUpFollows(); $twitter_crawler->updateFriendsProfiles(); //Retrieve search results for saved keyword/hashtags $instances_hashtags = $instance_hashtag_dao->getByInstance($instance->id); foreach ($instances_hashtags as $instance_hashtag) { $twitter_crawler->fetchInstanceHashtagTweets($instance_hashtag); } } else { throw new Exception('Missing Twitter OAuth tokens.'); } } catch (Exception $e) { $logger->logUserError(get_class($e) . " while crawling " . $instance->network_username . " on Twitter: " . $e->getMessage(), __METHOD__ . ',' . __LINE__); } $dashboard_module_cacher->cacheDashboardModules(); // Save instance if (isset($twitter_crawler->user)) { $instance_dao->save($instance, $twitter_crawler->user->post_count, $logger); } Reporter::reportVersion($instance); $logger->logUserSuccess("Finished collecting data for " . $instance->network_username . " on Twitter.", __METHOD__ . ',' . __LINE__); } }
public function testFetchInstanceHashtagTweetsOnlyUserExistInDB() { $this->debug(__METHOD__); self::setUpInstanceUserEduardCucurella(); $post_dao = DAOFactory::getDAO('PostDAO'); $user_dao = DAOFactory::getDAO('UserDAO'); $hashtag_dao = DAOFactory::getDAO('HashtagDAO'); $hashtagpost_dao = DAOFactory::getDAO('HashtagPostDAO'); $link_dao = DAOFactory::getDAO('LinkDAO'); //Post and User NOT exist $this->assertFalse($post_dao->isPostInDB('307436813180616704', 'twitter')); $this->assertFalse($user_dao->isUserInDB('2485041', 'twitter')); $this->assertEqual($post_dao->getTotalPostsByUser('GinaTost', 'twitter'), 0); $builder = $this->buildDataUser(); //Post NOT exist and User exist $this->assertFalse($post_dao->isPostInDB('307436813180616704', 'twitter')); $this->assertTrue($user_dao->isUserInDB('2485041', 'twitter')); $this->assertEqual($post_dao->getTotalPostsByUser('GinaTost', 'twitter'), 0); //crawls $twitter_crawler = new TwitterCrawler($this->instance, $this->api); $twitter_crawler->api->to->setDataPathFolder('testoftwittercrawler/searchtweets/'); $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO'); $instances_hashtags = $instance_hashtag_dao->getByInstance(1); $twitter_crawler->fetchInstanceHashtagTweets($instances_hashtags[0]); //Post exist $this->assertTrue($post_dao->isPostInDB('307436813180616704', 'twitter')); $this->assertTrue($user_dao->isUserInDB('2485041', 'twitter')); $this->assertEqual($post_dao->getTotalPostsByUser('GinaTost', 'twitter'), 1); $this->assertTrue($post_dao->isPostInDB('307436651154665473', 'twitter')); $this->assertTrue($user_dao->isUserInDB('251219944', 'twitter')); $this->assertEqual($post_dao->getTotalPostsByUser('XerpaC', 'twitter'), 1); //Hashtag $res = $hashtag_dao->getHashtagByID(1); $this->assertEqual($res->hashtag, '#mwc2013'); $this->assertEqual($res->network, 'twitter'); $this->assertEqual($res->count_cache, 2); //How many posts for hashtag_id 1 in tu_hashtags_posts $res = $hashtagpost_dao->getHashtagPostsByHashtagID(1); $this->assertEqual(sizeof($res), 2); $this->assertEqual($res[0]['post_id'], '307436651154665473'); $this->assertEqual($res[1]['post_id'], '307436813180616704'); $res = $link_dao->getLinksForPost('307436813180616704', 'twitter'); $this->assertEqual(sizeof($res), 1); $this->assertEqual($res[0]->url, 'http://t.co/yPMZd3eTNb'); $res = $link_dao->getLinksForPost('307436651154665473', 'twitter'); $this->assertEqual(sizeof($res), 1); $this->assertEqual($res[0]->url, 'http://t.co/8yet1gjfDm'); }