function testDetectRetweets() { $recent_tweets = array(new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9021481076, 'post_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation', 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9020176425, 'post_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9031523906, 'post_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 8925077246, 'post_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0))); $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation"; $startwithcolonspaces = "RT @ginatrapani how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $nonexistent = "rt @ginatrapani this is a non-existent tweet"; $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246); $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false); }
function testDetectRetweets() { $recent_tweets = array(new Tweet(array('status_id' => 9021481076.0, 'tweet_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation')), new Tweet(array('status_id' => 9020176425.0, 'tweet_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.")), new Tweet(array('status_id' => 9031523906.0, 'tweet_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson")), new Tweet(array('status_id' => 8925077246.0, 'tweet_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"))); $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation"; $startwithcolonspaces = "RT @ginatrapani how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $nonexistent = "rt @ginatrapani this is a non-existent tweet"; $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false); }
/** * Fetch the current instance user's mentions from Twitter and store in the database. * Detect whether or not a mention is a retweet and store as such. */ public function fetchInstanceUserMentions() { if (!isset($this->user)) { $this->fetchInstanceUserInfo(); } if (isset($this->user)) { $status_message = ""; if ($this->api->available_api_calls_for_crawler > 0) { $got_newest_mentions = false; $continue_fetching = true; while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) { $mentions = $this->api->cURL_source['mentions']; $args = array(); $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args["count"] = $count_arg; $args['include_rts'] = 'true'; if ($got_newest_mentions) { $this->instance->last_page_fetched_replies++; $args['page'] = $this->instance->last_page_fetched_replies; } try { list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $args); } catch (APICallLimitExceededException $e) { break; } if ($cURL_status > 200) { $continue_fetching = false; } else { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) == 0 && $got_newest_mentions) { // you're paged back and no new tweets $this->instance->last_page_fetched_replies = 1; $continue_fetching = false; $this->instance->is_archive_loaded_mentions = true; $status_message = 'Paged back but not finding new mentions; moving on.'; $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } $post_dao = DAOFactory::getDAO('PostDAO'); if (!isset($recentTweets)) { $recentTweets = $post_dao->getAllPosts($this->user->user_id, 'twitter', 100); } $count = 0; foreach ($tweets as $tweet) { // Figure out if the mention is a retweet if (RetweetDetector::isRetweet($tweet['post_text'], $this->user->username)) { $this->logger->logInfo("Retweet found, " . substr($tweet['post_text'], 0, 50) . "... ", __METHOD__ . ',' . __LINE__); // if did find retweet, add in_rt_of_user_id info // even if can't find original post id $tweet['in_rt_of_user_id'] = $this->user->user_id; $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['post_text'], $recentTweets); if ($originalTweetId != false) { $tweet['in_retweet_of_post_id'] = $originalTweetId; $this->logger->logInfo("Retweet original status ID found: " . $originalTweetId, __METHOD__ . ',' . __LINE__); } } $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger); if ($inserted_post_key !== false) { $count++; //expand and insert links contained in tweet URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); if ($tweet['user_id'] != $this->user->user_id) { //don't update owner info from reply $u = new User($tweet, 'mentions'); $this->user_dao->updateUser($u); } } } if ($got_newest_mentions) { if ($count > 0) { $status_message .= count($tweets) . " mentions on page " . $this->instance->last_page_fetched_replies . " and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } } else { if ($count == 0) { $status_message = "No new mentions found."; $this->logger->logUserInfo($status_message, __METHOD__ . ',' . __LINE__); } else { $status_message .= count($tweets) . " mentions found and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); } $status_message = ""; } $got_newest_mentions = true; if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) { $continue_fetching = false; $status_message .= 'Retrieved newest mentions; Archive loaded; Stopping reply fetch.'; $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } } } } } }
public function fetchInstanceUserMentions() { if (!isset($this->owner_object)) { $this->fetchInstanceUserInfo(); } if (isset($this->owner_object)) { $status_message = ""; // Get owner's mentions if ($this->api->available_api_calls_for_crawler > 0) { $got_newest_mentions = false; $continue_fetching = true; while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) { # Get the most recent mentions $mentions = $this->api->cURL_source['mentions']; $args = array(); $args['count'] = 200; $args['include_rts'] = 'true'; if ($got_newest_mentions) { $this->last_page_fetched_mentions++; $args['page'] = $this->last_page_fetched_mentions; } list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $args); if ($cURL_status > 200) { $continue_fetching = false; } else { try { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) == 0 && $got_newest_mentions) { # you're paged back and no new tweets $this->last_page_fetched_mentions = 1; $continue_fetching = false; $this->instance->is_archive_loaded_mentions = true; $status_message = 'Paged back but not finding new mentions; moving on.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } $pd = DAOFactory::getDAO('PostDAO'); if (!isset($recentTweets)) { $recentTweets = $pd->getAllPosts($this->owner_object->user_id, 'twitter', 100); } $count = 0; foreach ($tweets as $tweet) { // Figure out if the mention is a retweet if (RetweetDetector::isRetweet($tweet['post_text'], $this->owner_object->username)) { $this->logger->logStatus("Retweet found, " . substr($tweet['post_text'], 0, 50) . "... ", get_class($this)); $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['post_text'], $recentTweets); if ($originalTweetId != false) { $tweet['in_retweet_of_post_id'] = $originalTweetId; $this->logger->logStatus("Retweet original status ID found: " . $originalTweetId, get_class($this)); } } if ($pd->addPost($tweet, $this->owner_object, $this->logger) > 0) { $count++; //expand and insert links contained in tweet $this->processTweetURLs($tweet); if ($tweet['user_id'] != $this->owner_object->user_id) { //don't update owner info from reply $u = new User($tweet, 'mentions'); $this->user_dao->updateUser($u); } } } $status_message .= count($tweets) . " mentions found and {$count} saved"; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; $got_newest_mentions = true; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) { $continue_fetching = false; $status_message .= 'Retrieved newest mentions; Archive loaded; Stopping reply fetch.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } catch (Exception $e) { $status_message = 'Could not parse mentions XML for $this->owner_object->username'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } } } else { $status_message = 'Crawler API error: either call limit exceeded or API returned an error.'; } $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } else { $this->logger->logStatus("Cannot fetch search results; Owner object has not been set.", get_class($this)); } }
function fetchInstanceUserMentions($lurl, $fa) { $status_message = ""; // Get owner's mentions if ($this->api->available_api_calls_for_crawler > 0) { $got_newest_mentions = false; $continue_fetching = true; while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) { # Get the most recent mentions $mentions = str_replace("[id]", $this->owner_object->username, $this->api->cURL_source['mentions']); $args = array(); $args['count'] = 200; if ($got_newest_mentions) { $this->last_page_fetched_mentions++; $args['page'] = $this->last_page_fetched_mentions; } list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $this->logger, $args); if ($cURL_status > 200) { $continue_fetching = false; } else { try { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) == 0 && $got_newest_mentions) { # you're paged back and no new tweets $this->last_page_fetched_mentions = 1; $continue_fetching = false; $this->instance->is_archive_loaded_mentions = true; $status_message = 'Paged back but not finding new mentions; moving on.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } $td = new TweetDAO($this->db, $this->logger); if (!isset($recentTweets)) { $recentTweets = $td->getAllTweets($this->owner_object->id, 15); } $count = 0; foreach ($tweets as $tweet) { // Figure out if the mention is a retweet if (RetweetDetector::isRetweet($tweet['tweet_text'], $this->owner_object->username)) { $this->logger->logStatus("Retweet found, " . substr($tweet['tweet_text'], 0, 50) . "... ", get_class($this)); $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['tweet_text'], $recentTweets); if ($originalTweetId != false) { $tweet['in_retweet_of_status_id'] = $originalTweetId; $this->logger->logStatus("Retweet original status ID found: " . $originalTweetId, get_class($this)); } } if ($td->addTweet($tweet, $this->owner_object, $this->logger) > 0) { $count++; //expand and insert links contained in tweet $this->processTweetURLs($tweet, $lurl, $fa); if ($tweet['user_id'] != $this->owner_object->id) { //don't update owner info from reply $u = new User($tweet, 'mentions'); $this->ud->updateUser($u, $this->logger); } } } $status_message .= count($tweets) . " mentions found and {$count} saved"; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; $got_newest_mentions = true; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) { $continue_fetching = false; $status_message .= 'Retrieved newest mentions; Reply archive loaded; Stopping reply fetch.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } catch (Exception $e) { $status_message = 'Could not parse mentions XML for $this->owner_object->username'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } } } else { $status_message = 'Crawler API call limit exceeded.'; } $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; }