function testDetectRetweets() { $recent_tweets = array(new Tweet(array('status_id' => 9021481076.0, 'tweet_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation')), new Tweet(array('status_id' => 9020176425.0, 'tweet_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.")), new Tweet(array('status_id' => 9031523906.0, 'tweet_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson")), new Tweet(array('status_id' => 8925077246.0, 'tweet_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"))); $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation"; $startwithcolonspaces = "RT @ginatrapani how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $nonexistent = "rt @ginatrapani this is a non-existent tweet"; $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246.0); $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false); }
function testDetectRetweets() { $recent_tweets = array(new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9021481076, 'post_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation', 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9020176425, 'post_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9031523906, 'post_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 8925077246, 'post_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0))); $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation"; $startwithcolonspaces = "RT @ginatrapani how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH"; $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx."; $nonexistent = "rt @ginatrapani this is a non-existent tweet"; $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906); $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246); $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false); }
/** * Retrieve a retweeting user's timeline * @param array $retweeted_status * @param User $user_with_retweet */ private function fetchUserTimelineForRetweet($retweeted_status, $user_with_retweet) { $retweeted_status_id = $retweeted_status["post_id"]; $status_message = ""; if ($this->api->available && $this->api->available_api_calls_for_crawler > 0) { $stream_with_retweet = str_replace("[id]", $user_with_retweet->username, $this->api->cURL_source['user_timeline']); $args = array(); $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args['count'] = $count_arg; $args["include_rts"] = "true"; try { list($cURL_status, $twitter_data) = $this->api->apiRequest($stream_with_retweet, $args); } catch (APICallLimitExceededException $e) { return; } if ($cURL_status == 200) { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) > 0) { $post_dao = DAOFactory::getDAO('PostDAO'); foreach ($tweets as $tweet) { // The parser now processes native retweet information for posts (and includes the // orig post in the parsed data if there was a RT). This method can now take advantage // of this additional processing. // If it was detected that this tweet was a native RT during parsing of the xml, the // 'in_retweet_of_post_id' value should already be set. If it is not set, go through the // usual procedure to try to find it. // This is just an efficiency fix, since if 'in_retweet_of_post_id' *is* set, it's not // going to be unset if the retweet detector doesn't pick up on anything. if (!isset($tweet['in_retweet_of_post_id']) || !$tweet['in_retweet_of_post_id']) { // then try to find rt -- otherwise, information already there if (RetweetDetector::isRetweet($tweet['post_text'], $this->user->username)) { $this->logger->logInfo("Retweet by " . $tweet['user_name'] . " found, " . substr($tweet['post_text'], 0, 50) . "... ", __METHOD__ . ',' . __LINE__); if (RetweetDetector::isRetweetOfTweet($tweet["post_text"], $retweeted_status["post_text"])) { $tweet['in_retweet_of_post_id'] = $retweeted_status_id; $this->logger->logInfo("Retweet by " . $tweet['user_name'] . " of " . $this->user->username . " original status ID found: " . $retweeted_status_id, __METHOD__ . ',' . __LINE__); } else { $this->logger->logInfo("Retweet by " . $tweet['user_name'] . " of " . $this->user->username . " original status ID NOT found: " . $retweeted_status["post_text"] . " NOT a RT of: " . $tweet["post_text"], __METHOD__ . ',' . __LINE__); } } } // an 'else' clause (if 'in_retweet_of_post_id' WAS set) can be used to log // diagnostic information. Leaving in as example for now. // else { // // $rtp = $tweet['retweeted_post']['content']; // $this->logger->logDebug("Post " . $tweet['post_id'] . //", " . $tweet['post_text'] . // " from " . $tweet['user_name'] . // " is rt of " . $tweet['in_retweet_of_post_id'],// . ", ". $rtp['post_text'], // __METHOD__.','.__LINE__); // } $inserted_post_key = $post_dao->addPost($tweet, $user_with_retweet, $this->logger); if ($inserted_post_key !== false) { $count++; //expand and insert links contained in tweet URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); $this->user_dao->updateUser($user_with_retweet); } } $this->logger->logInfo(count($tweets) . " tweet(s) found in usertimeline via retweet for " . $user_with_retweet->username . " and {$count} saved", __METHOD__ . ',' . __LINE__); } } elseif ($cURL_status == 401) { //not authorized to see user timeline //don't set API to unavailable just because a private user retweeted $this->api->available = true; $status_message .= 'Not authorized to see ' . $user_with_retweet->username . "'s timeline;moving on."; $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__); } } }
/** * Retrieve a retweeting user's timeline * @param array $retweeted_status * @param User $user_with_retweet */ private function fetchUserTimelineForRetweet($retweeted_status, $user_with_retweet) { $retweeted_status_id = $retweeted_status["post_id"]; $status_message = ""; if ($this->api->available && $this->api->available_api_calls_for_crawler > 0) { $stream_with_retweet = str_replace("[id]", $user_with_retweet->username, $this->api->cURL_source['user_timeline']); $args = array(); $args["count"] = 200; $args["include_rts"] = "true"; list($cURL_status, $twitter_data) = $this->api->apiRequest($stream_with_retweet, $args); if ($cURL_status == 200) { try { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) > 0) { $pd = DAOFactory::getDAO('PostDAO'); foreach ($tweets as $tweet) { if (RetweetDetector::isRetweet($tweet['post_text'], $this->owner_object->username)) { $this->logger->logStatus("Retweet by " . $tweet['user_name'] . " found, " . substr($tweet['post_text'], 0, 50) . "... ", get_class($this)); if (RetweetDetector::isRetweetOfTweet($tweet["post_text"], $retweeted_status["post_text"])) { $tweet['in_retweet_of_post_id'] = $retweeted_status_id; $this->logger->logStatus("Retweet by " . $tweet['user_name'] . " of " . $this->owner_object->username . " original status ID found: " . $retweeted_status_id, get_class($this)); } else { $this->logger->logStatus("Retweet by " . $tweet['user_name'] . " of " . $this->owner_object->username . " original status ID NOT found: " . $retweeted_status["post_text"] . " NOT a RT of: " . $tweet["post_text"], get_class($this)); } } if ($pd->addPost($tweet, $user_with_retweet, $this->logger) > 0) { $count++; //expand and insert links contained in tweet $this->processTweetURLs($tweet); $this->user_dao->updateUser($user_with_retweet); } } $this->logger->logStatus(count($tweets) . " tweet(s) found in usertimeline via retweet for " . $user_with_retweet->username . " and {$count} saved", get_class($this)); } } catch (Exception $e) { $this->logger->logStatus($e->getMessage(), get_class($this)); $this->logger->logStatus('Could not parse timeline for retweets XML for ' . $user_with_retweet->username, get_class($this)); } } elseif ($cURL_status == 401) { //not authorized to see user timeline //don't set API to unavailable just because a private user retweeted $this->api->available = true; $status_message .= 'Not authorized to see ' . $user_with_retweet->username . "'s timeline;moving on."; } else { $status_message .= 'API returned error code ' . $cURL_status; } } else { $status_message .= 'Crawler API error: either call limit exceeded or API returned an error.'; } $this->logger->logStatus($status_message, get_class($this)); }
/** * Fetch the current instance user's mentions from Twitter and store in the database. * Detect whether or not a mention is a retweet and store as such. */ public function fetchInstanceUserMentions() { if (!isset($this->user)) { $this->fetchInstanceUserInfo(); } if (isset($this->user)) { $status_message = ""; $endpoint = $this->api->endpoints['mentions']; $args = array(); $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args["count"] = $count_arg; $got_newest_mentions = false; $continue_fetching = true; while ($continue_fetching) { if ($got_newest_mentions && $this->instance->last_reply_id !== "") { $args['max_id'] = $this->instance->last_reply_id; } try { list($http_status, $payload) = $this->api->apiRequest($endpoint, $args); if ($http_status > 200) { $continue_fetching = false; } else { $count = 0; $tweets = $this->api->parseJSONTweets($payload); if (count($tweets) == 0 && $got_newest_mentions) { // you're paged back and no new tweets $continue_fetching = false; $this->instance->is_archive_loaded_mentions = true; $status_message = 'Paged back but not finding new mentions; moving on.'; $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } $post_dao = DAOFactory::getDAO('PostDAO'); $mention_dao = DAOFactory::getDAO('MentionDAO'); if (!isset($recentTweets)) { $recentTweets = $post_dao->getAllPosts($this->user->user_id, 'twitter', 100); } $count = 0; foreach ($tweets as $tweet) { // Figure out if the mention is a retweet if (RetweetDetector::isRetweet($tweet['post_text'], $this->user->username)) { $this->logger->logInfo("Retweet found, " . substr($tweet['post_text'], 0, 50) . "... ", __METHOD__ . ',' . __LINE__); // if did find retweet, add in_rt_of_user_id info // even if can't find original post id $tweet['in_rt_of_user_id'] = $this->user->user_id; $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['post_text'], $recentTweets); if ($originalTweetId != false) { $tweet['in_retweet_of_post_id'] = $originalTweetId; $this->logger->logInfo("Retweet original status ID found: " . $originalTweetId, __METHOD__ . ',' . __LINE__); } } $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger); if ($inserted_post_key !== false) { $count++; //expand and insert links contained in tweet URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); if ($tweet['user_id'] != $this->user->user_id) { //don't update owner info from reply $u = new User($tweet, 'mentions'); $this->user_dao->updateUser($u); } $mention_dao->insertMention($this->user->user_id, $this->user->username, $tweet['post_id'], $tweet['author_user_id'], 'twitter'); } if ($this->instance->last_reply_id == "" || $got_newest_mentions) { $this->instance->last_reply_id = $tweet['post_id']; } } if ($got_newest_mentions) { if ($count > 0) { $status_message .= count($tweets) . " mentions past reply ID " . $this->instance->last_reply_id . " and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } } else { if ($count == 0) { $status_message = "No new mentions found."; $this->logger->logUserInfo($status_message, __METHOD__ . ',' . __LINE__); } else { $status_message .= count($tweets) . " mentions found and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); } $status_message = ""; } $got_newest_mentions = true; if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) { $continue_fetching = false; $status_message .= 'Retrieved newest mentions; Archive loaded; Stopping reply fetch.'; $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } if (isset($args['max_id']) && $args['max_id'] !== "" && $this->instance->last_reply_id !== "" && $args['max_id'] == $this->instance->last_reply_id) { $continue_fetching = false; } } } catch (APICallLimitExceededException $e) { $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__); break; } } } }
function fetchInstanceUserMentions($lurl, $fa) { $status_message = ""; // Get owner's mentions if ($this->api->available_api_calls_for_crawler > 0) { $got_newest_mentions = false; $continue_fetching = true; while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) { # Get the most recent mentions $mentions = str_replace("[id]", $this->owner_object->username, $this->api->cURL_source['mentions']); $args = array(); $args['count'] = 200; if ($got_newest_mentions) { $this->last_page_fetched_mentions++; $args['page'] = $this->last_page_fetched_mentions; } list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $this->logger, $args); if ($cURL_status > 200) { $continue_fetching = false; } else { try { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) == 0 && $got_newest_mentions) { # you're paged back and no new tweets $this->last_page_fetched_mentions = 1; $continue_fetching = false; $this->instance->is_archive_loaded_mentions = true; $status_message = 'Paged back but not finding new mentions; moving on.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } $td = new TweetDAO($this->db, $this->logger); if (!isset($recentTweets)) { $recentTweets = $td->getAllTweets($this->owner_object->id, 15); } $count = 0; foreach ($tweets as $tweet) { // Figure out if the mention is a retweet if (RetweetDetector::isRetweet($tweet['tweet_text'], $this->owner_object->username)) { $this->logger->logStatus("Retweet found, " . substr($tweet['tweet_text'], 0, 50) . "... ", get_class($this)); $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['tweet_text'], $recentTweets); if ($originalTweetId != false) { $tweet['in_retweet_of_status_id'] = $originalTweetId; $this->logger->logStatus("Retweet original status ID found: " . $originalTweetId, get_class($this)); } } if ($td->addTweet($tweet, $this->owner_object, $this->logger) > 0) { $count++; //expand and insert links contained in tweet $this->processTweetURLs($tweet, $lurl, $fa); if ($tweet['user_id'] != $this->owner_object->id) { //don't update owner info from reply $u = new User($tweet, 'mentions'); $this->ud->updateUser($u, $this->logger); } } } $status_message .= count($tweets) . " mentions found and {$count} saved"; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; $got_newest_mentions = true; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) { $continue_fetching = false; $status_message .= 'Retrieved newest mentions; Reply archive loaded; Stopping reply fetch.'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } catch (Exception $e) { $status_message = 'Could not parse mentions XML for $this->owner_object->username'; $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; } } } } else { $status_message = 'Crawler API call limit exceeded.'; } $this->logger->logStatus($status_message, get_class($this)); $status_message = ""; }
/** * @param $content * @return array */ private function parsePost($content) { $logger = Logger::getInstance('stream_log_location'); $rt_string = "RT @"; $post = array(); $mentions = array(); $urls = array(); $hashtags = array(); $entities = array(); $user_array = array(); try { $post['is_rt'] = false; $post['in_rt_of_user_id'] = ''; $user = $content['user']; // parse info into user and post arrays $post['post_id'] = $content['id_str']; $post['author_user_id'] = $user['id_str']; $post['author_username'] = $user['screen_name']; $post['author_fullname'] = $user['name']; $post['author_avatar'] = $user['profile_image_url']; $post['author_follower_count'] = $user['followers_count']; $post['post_text'] = $content['text']; $post['is_protected'] = $user['protected']; $post['source'] = $content['source']; $post['location'] = $user['location']; $post['description'] = $user['description']; $post['url'] = $user['url']; $post['author_friends_count'] = $user['friends_count']; $post['author_post_count'] = $user['statuses_count']; $post['author_joined'] = gmdate("Y-m-d H:i:s", strToTime($user['created_at'])); $post['favorited'] = $content['favorited']; $user_array['url'] = $user['url']; // for now, retain existing 'place' handling, where a place is set in the post. // Set new place_id field as well, and add point coord information if it exists $logger->logDebug("point coords: " . print_r($content['coordinates'], true), __METHOD__ . ',' . __LINE__); $place = $content['place']; if ($place != null) { $post['place'] = $place['full_name']; $post['place_id'] = $place['id']; if (isset($content['coordinates'])) { $place['point_coords'] = $content['coordinates']; } } else { $post['place'] = null; $post['place_id'] = null; // it's possible for the point coords to be set even if the place is not. if (isset($content['coordinates'])) { $place = array(); $place['point_coords'] = $content['coordinates']; } } $post['pub_date'] = gmdate("Y-m-d H:i:s", strToTime($content['created_at'])); $post['in_reply_to_user_id'] = $content['in_reply_to_user_id_str']; $post['in_reply_to_post_id'] = $content['in_reply_to_status_id_str']; $post['network'] = 'twitter'; $post['reply_count_cache'] = 0; if (isset($content['entities'])) { foreach ($content['entities']['user_mentions'] as $m) { $mention_info = array(); $mention_info['user_id'] = $m['id_str']; $mention_info['user_name'] = $m['screen_name']; $mentions[] = $mention_info; } // get urls foreach ($content['entities']['urls'] as $u) { // This block broken under 0.11 /* $url_info = array(); $url_info['url']= $u['url']; if (isset($u['expanded_url'])) { $url_info['expanded_url'] = $u['expanded_url']; print "expanded url for: " . $url_info['url'] . ": " . $url_info['expanded_url'] . "\n"; } else { $url_info['expanded_url'] = ''; } $urls[] = $url_info; */ // just need an array of urls now... if (isset($u['expanded_url'])) { array_push($urls, $u['expanded_url']); } else { if (isset($u['url'])) { array_push($urls, $u['url']); } } } // get hashtags foreach ($content['entities']['hashtags'] as $h) { $hashtags[] = $h['text']; } } $logger->logDebug($post['post_text'] . " -- " . $post['author_username'], __METHOD__ . ',' . __LINE__); if (!isset($content['retweeted_status'])) { if (isset($content['retweet_count'])) { // do this only for the original post (rt will have rt count too) $retweet_count_api = $content['retweet_count']; $pos = strrpos($content['retweet_count'], '+'); if ($pos != false) { // remove '+', e.g. '100+' -- so currently 100 is max that can be indicated $retweet_count_api = substr($content['retweet_count'], 0, $pos); } $post['retweet_count_api'] = $retweet_count_api; $this->logger->logDebug($content['id_str'] . " is not a retweet but orig., count is: " . $content['retweet_count'] . "/ " . $retweet_count_api, __METHOD__ . ',' . __LINE__); } // // parse to see if 'old-style' retweet "RT @..." for first 'mention'-- if so, set that information if (sizeof($mentions) > 0) { $first_mention = $mentions[0]['user_name']; $logger->logDebug("first mention: {$first_mention}", __METHOD__ . ',' . __LINE__); if (RetweetDetector::isRetweet($post['post_text'], $first_mention)) { $post['is_rt'] = true; $post['in_rt_of_user_id'] = $mentions[0]['user_id']; $logger->logDebug("detected retweet of: " . $post['in_rt_of_user_id'] . ", " . $first_mention, __METHOD__ . ',' . __LINE__); } } } else { // then this is a retweet. // Process its original too. $this->logger->logInfo("this is a retweet, will first process original post " . $content['retweeted_status']['id_str'] . " from user " . $content['retweeted_status']['user']['id_str'], __METHOD__ . ',' . __LINE__); list($orig_post, $orig_entities, $orig_user_array) = $this->parsePost($content['retweeted_status']); $rtp = array(); $rtp['content'] = $orig_post; $rtp['entities'] = $orig_entities; $rtp['user_array'] = $orig_user_array; $post['retweeted_post'] = $rtp; $post['in_retweet_of_post_id'] = $content['retweeted_status']['id_str']; $post['in_rt_of_user_id'] = $content['retweeted_status']['user']['id_str']; } $user_array = $this->parseUser($user, $post['pub_date']); } catch (Exception $e) { $logger->logErrro("exception: {$e}", __METHOD__ . ',' . __LINE__); } $entities['urls'] = $urls; $entities['mentions'] = $mentions; $entities['hashtags'] = $hashtags; $entities['place'] = $place; // add 'place' object to entities array; may be null return array($post, $entities, $user_array); }
/** * Retrieve a retweeting user's timeline * @param array $retweeted_status * @param User $user_with_retweet */ private function fetchUserTimelineForRetweet($retweeted_status, $user_with_retweet) { $retweeted_status_id = $retweeted_status["post_id"]; $status_message = ""; if ($this->api->available && $this->api->available_api_calls_for_crawler > 0) { $stream_with_retweet = str_replace("[id]", $user_with_retweet->username, $this->api->cURL_source['user_timeline']); $args = array(); $args["count"] = 200; $args["include_rts"]="true"; list($cURL_status, $twitter_data) = $this->api->apiRequest($stream_with_retweet, $args); if ($cURL_status == 200) { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) > 0) { $pd = DAOFactory::getDAO('PostDAO'); foreach ($tweets as $tweet) { if (RetweetDetector::isRetweet($tweet['post_text'], $this->user->username)) { $this->logger->logInfo("Retweet by ".$tweet['user_name']. " found, ". substr($tweet['post_text'], 0, 50)."... ", __METHOD__.','.__LINE__); if ( RetweetDetector::isRetweetOfTweet($tweet["post_text"], $retweeted_status["post_text"]) ){ $tweet['in_retweet_of_post_id'] = $retweeted_status_id; $this->logger->logInfo("Retweet by ".$tweet['user_name']." of ". $this->user->username." original status ID found: ".$retweeted_status_id, __METHOD__.','.__LINE__); } else { $this->logger->logInfo("Retweet by ".$tweet['user_name']." of ". $this->user->username." original status ID NOT found: ". $retweeted_status["post_text"]." NOT a RT of: ". $tweet["post_text"], __METHOD__.','.__LINE__); } } if ($pd->addPost($tweet, $user_with_retweet, $this->logger) > 0) { $count++; //expand and insert links contained in tweet URLProcessor::processTweetURLs($this->logger, $tweet); $this->user_dao->updateUser($user_with_retweet); } } $this->logger->logInfo(count($tweets)." tweet(s) found in usertimeline via retweet for ". $user_with_retweet->username." and $count saved", __METHOD__.','.__LINE__); } } elseif ($cURL_status == 401) { //not authorized to see user timeline //don't set API to unavailable just because a private user retweeted $this->api->available = true; $status_message .= 'Not authorized to see '.$user_with_retweet->username."'s timeline;moving on."; $this->logger->logInfo($status_message, __METHOD__.','.__LINE__); } } }