/** * * @param Instance $instance * @return FacebookCrawler */ public function __construct($instance, $access_token) { $this->instance = $instance; $this->logger = Logger::getInstance(); $this->logger->setUsername($instance->network_username); $this->access_token = $access_token; }
public function generateInsight(Instance $instance, User $user, $last_week_of_posts, $number_days) { $this->logger = Logger::getInstance(); $this->logger->setUsername($instance->network_username); $this->insight_date = date("Y-m-d"); $this->insight_dao = DAOFactory::getDAO('InsightDAO'); $this->username = $instance->network == 'twitter' ? '@' . $instance->network_username : $instance->network_username; $this->terms = new InsightTerms($instance->network); }
/** * Constructor * @param Instance $instance * @param CrawlerTwitterAPIAccessorOAuth $api * @return TwitterCrawler */ public function __construct($instance, $api) { $this->instance = $instance; $this->api = $api; $this->logger = Logger::getInstance(); $this->logger->setUsername($instance->network_username); $this->user_dao = DAOFactory::getDAO('UserDAO'); $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO'); $this->twitter_options = $plugin_option_dao->getOptionsHash('twitter'); }
function facebook_crawl() { //TODO Crawl Facebook posts and comments and insert them into the database global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $instances = $id->getAllActiveInstancesStalestFirstByNetwork('facebook'); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $session_key = $tokens['oauth_access_token']; $fb = new Facebook($THINKTANK_CFG['facebook_api_key'], $THINKTANK_CFG['facebook_api_secret']); $cfg = new Config($i->network_username, $i->network_user_id); $id->updateLastRun($i->id); $crawler = new FacebookCrawler($i, $logger, $fb, $db); $crawler->fetchInstanceUserInfo($i->network_user_id, $session_key); $crawler->fetchUserPostsAndReplies($i->network_user_id, $session_key); $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $fb); } $logger->close(); # Close logging }
function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $instances = $id->getAllActiveInstancesStalestFirstByNetwork('twitter'); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $noauth = true; if (isset($tokens['oauth_access_token']) && $tokens['oauth_access_token'] != '' && isset($tokens['oauth_access_token_secret']) && $tokens['oauth_access_token_secret'] != '') { $noauth = false; } if ($noauth) { $api = new CrawlerTwitterAPIAccessorOAuth('NOAUTH', 'NOAUTH', $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } else { $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); } $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); // No auth req'd $crawler->fetchInstanceUserInfo(); // No auth for public Twitter users $crawler->fetchInstanceUserTweets(); if (!$noauth) { // Auth req'd, for calling user only $crawler->fetchInstanceUserRetweetsByMe(); // Auth req'd, for calling user only $crawler->fetchInstanceUserMentions(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFriends(); // Auth req'd, for calling user only $crawler->fetchInstanceUserFollowers(); } $crawler->fetchStrayRepliedToTweets(); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends(); // TODO: Get direct messages // TODO: Gather favorites data if ($noauth) { // No auth req'd $crawler->fetchSearchResults($i->network_username); } $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging }
function testCreatingNewLogger() { global $THINKTANK_CFG; $logger = new Logger($THINKTANK_CFG['log_location']); $logger->logStatus('Should write this to the log', get_class($this)); $this->assertTrue(file_exists($THINKTANK_CFG['log_location']), 'File created'); $messages = file($THINKTANK_CFG['log_location']); $this->assertWantedPattern('/Should write this to the log/', $messages[sizeof($messages) - 1]); $logger->setUsername('ginatrapani'); $logger->logStatus('Should write this to the log with a username', get_class($this)); $this->assertWantedPattern('/ginatrapani | TestOfLogging:Should write this to the log/', $messages[sizeof($messages) - 1]); $logger->close(); }
function twitter_crawl() { global $THINKTANK_CFG; global $db; global $conn; $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']); $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']); $instances = $id->getAllActiveInstancesStalestFirst(); foreach ($instances as $i) { $logger->setUsername($i->network_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $crawler = new TwitterCrawler($i, $logger, $api, $db); $cfg = new Config($i->network_username, $i->network_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo(); $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi); $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi); $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi); // TODO: Get direct messages // TODO: Gather favorites data $crawler->cleanUpFollows(); // Save instance $id->save($crawler->instance, $crawler->owner_object->post_count, $logger, $api); } } $logger->close(); # Close logging if (isset($conn)) { $db->closeConnection($conn); } // Clean up }
/** * Expand Bit.ly links and recheck click count on any links less than 2 days old. * * @param str bitly api key * @param str bitly login name */ public function acquireBitlyClickStats($api_key, $bit_login) { $this->logger->setUsername(null); $api_accessor = new BitlyAPIAccessor($api_key, $bit_login); $bitly_urls = array('http://bit.ly/', 'http://bitly.com/', 'http://j.mp/'); foreach ($bitly_urls as $bitly_url) { if ($this->link_limit != 0) { //all short links first seen in the last 48 hours $bitly_links_to_update = $this->short_link_dao->getLinksToUpdate($bitly_url); if (count($bitly_links_to_update) > 0) { $this->logger->logUserInfo(count($bitly_links_to_update) . " {$bitly_url}" . " links to acquire click stats for.", __METHOD__ . ',' . __LINE__); } else { $this->logger->logUserInfo("There are no " . $bitly_url . " links to fetch click stats for.", __METHOD__ . ',', __LINE__); } $total_links = 0; $total_errors = 0; $total_updated = 0; foreach ($bitly_links_to_update as $link) { $this->logger->logInfo("Getting bit.ly click stats for " . ($total_updated + 1) . " of " . count($bitly_links_to_update) . " " . $bitly_url . " links (" . $link->short_url . ")", __METHOD__ . ',' . __LINE__); $link_data = $api_accessor->getBitlyLinkData($link->short_url); if ($link_data["clicks"] != '') { //save click total here $this->short_link_dao->saveClickCount($link->short_url, $link_data["clicks"]); // Save title to links table if ($link_data["title"] != '') { $this->link_dao->updateTitle($link->link_id, $link_data["title"]); } $total_links = $total_links + 1; $total_updated = $total_updated + 1; } elseif ($link_data["error"] != '') { $this->link_dao->saveExpansionError($link->short_url, $link_data["error"]); $total_errors = $total_errors + 1; $total_updated = $total_updated + 1; } } $this->logger->logUserSuccess($total_links . " " . $bitly_url . " link click stats acquired (" . $total_errors . " errors)", __METHOD__ . ',' . __LINE__); } } }
<?php require_once 'config.crawler.inc.php'; ini_set("include_path", ini_get("include_path") . PATH_SEPARATOR . $INCLUDE_PATH); require_once "init.php"; $db = new Database($THINKTANK_CFG); $conn = $db->getConnection(); $logger = new Logger($THINKTANK_CFG['log_location']); $id = new InstanceDAO($db, $logger); $oid = new OwnerInstanceDAO($db, $logger); $lurlapi = new LongUrlAPIAccessor($THINKTANK_CFG['app_title']); $flickrapi = new FlickrAPIAccessor($THINKTANK_CFG['flickr_api_key']); $instances = $id->getAllInstancesStalestFirst(); foreach ($instances as $i) { $logger->setUsername($i->twitter_username); $tokens = $oid->getOAuthTokens($i->id); $api = new CrawlerTwitterAPIAccessorOAuth($tokens['oauth_access_token'], $tokens['oauth_access_token_secret'], $THINKTANK_CFG['oauth_consumer_key'], $THINKTANK_CFG['oauth_consumer_secret'], $i, $THINKTANK_CFG['archive_limit']); $crawler = new Crawler($i, $logger, $api, $db); $cfg = new Config($i->twitter_username, $i->twitter_user_id); $api->init($logger); if ($api->available_api_calls_for_crawler > 0) { $id->updateLastRun($i->id); $crawler->fetchInstanceUserInfo(); $crawler->fetchInstanceUserTweets($lurlapi, $flickrapi); $crawler->fetchInstanceUserRetweetsByMe($lurlapi, $flickrapi); $crawler->fetchInstanceUserMentions($lurlapi, $flickrapi); $crawler->fetchInstanceUserFriends(); $crawler->fetchInstanceUserFollowers(); $crawler->fetchStrayRepliedToTweets($lurlapi, $flickrapi); $crawler->fetchUnloadedFollowerDetails(); $crawler->fetchFriendTweetsAndFriends($lurlapi, $flickrapi);