Пример #1
0
 function testDetectRetweets()
 {
     $recent_tweets = array(new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9021481076, 'post_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation', 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9020176425, 'post_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 9031523906, 'post_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)), new Post(array('id' => 1, 'author_user_id' => 10, 'author_username' => 'no one', 'author_fullname' => "No One", 'author_avatar' => 'yo.jpg', 'source' => 'TweetDeck', 'pub_date' => '', 'adj_pub_date' => '', 'in_reply_to_user_id' => '', 'in_reply_to_post_id' => '', 'reply_count_cache' => '', 'in_retweet_of_post_id' => '', 'retweet_count_cache' => '', 'post_id' => 8925077246, 'post_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH", 'network' => 'twitter', 'geo' => '', 'place' => '', 'location' => '', 'is_geo_encoded' => 0, 'is_reply_by_friend' => 0, 'is_retweet_by_friend' => 0, 'reply_retweet_distance' => 0)));
     $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH";
     $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation";
     $startwithcolonspaces = "RT @ginatrapani    how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH";
     $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx.";
     $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx.";
     $nonexistent = "rt @ginatrapani this is a non-existent tweet";
     $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false);
 }
 function testDetectRetweets()
 {
     $recent_tweets = array(new Tweet(array('status_id' => 9021481076.0, 'tweet_text' => 'guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation')), new Tweet(array('status_id' => 9020176425.0, 'tweet_text' => "a Google fangirl's take: no doubt Buzz's privacy issues are seriously problematic, but at least they're iterating quickly and openly.")), new Tweet(array('status_id' => 9031523906.0, 'tweet_text' => "one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx @voiceofsandiego, @dagnysalas, & @samuelhodgson")), new Tweet(array('status_id' => 8925077246.0, 'tweet_text' => "how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH")));
     $startwithcolon = "RT @ginatrapani: how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH";
     $nostartnocolon = "Agreed: RT @ginatrapani guilty pleasure: dropping the "my wife" bomb on unsuspecting straight people, mid-conversation";
     $startwithcolonspaces = "RT @ginatrapani    how to do (almost) everything in Google Buzz, including turn it off http://bit.ly/bfQTQH";
     $startwithcoloncutoff = "RT @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx.";
     $lowwercase = "rt @ginatrapani: one of the most fun photo shoots & interviews I've ever done http://bit.ly/9ldYNw thx.";
     $nonexistent = "rt @ginatrapani this is a non-existent tweet";
     $this->assertTrue(RetweetDetector::detectOriginalTweet($nostartnocolon, $recent_tweets) == 9021481076.0);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolonspaces, $recent_tweets) == 8925077246.0);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcoloncutoff, $recent_tweets) == 9031523906.0);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($startwithcolon, $recent_tweets) == 8925077246.0);
     $this->assertTrue(RetweetDetector::detectOriginalTweet($nonexistent, $recent_tweets) === false);
 }
Пример #3
0
 /**
  * Fetch the current instance user's mentions from Twitter and store in the database.
  * Detect whether or not a mention is a retweet and store as such.
  */
 public function fetchInstanceUserMentions()
 {
     if (!isset($this->user)) {
         $this->fetchInstanceUserInfo();
     }
     if (isset($this->user)) {
         $status_message = "";
         if ($this->api->available_api_calls_for_crawler > 0) {
             $got_newest_mentions = false;
             $continue_fetching = true;
             while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) {
                 $mentions = $this->api->cURL_source['mentions'];
                 $args = array();
                 $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100;
                 $args["count"] = $count_arg;
                 $args['include_rts'] = 'true';
                 if ($got_newest_mentions) {
                     $this->instance->last_page_fetched_replies++;
                     $args['page'] = $this->instance->last_page_fetched_replies;
                 }
                 try {
                     list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $args);
                 } catch (APICallLimitExceededException $e) {
                     break;
                 }
                 if ($cURL_status > 200) {
                     $continue_fetching = false;
                 } else {
                     $count = 0;
                     $tweets = $this->api->parseXML($twitter_data);
                     if (count($tweets) == 0 && $got_newest_mentions) {
                         // you're paged back and no new tweets
                         $this->instance->last_page_fetched_replies = 1;
                         $continue_fetching = false;
                         $this->instance->is_archive_loaded_mentions = true;
                         $status_message = 'Paged back but not finding new mentions; moving on.';
                         $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__);
                         $status_message = "";
                     }
                     $post_dao = DAOFactory::getDAO('PostDAO');
                     if (!isset($recentTweets)) {
                         $recentTweets = $post_dao->getAllPosts($this->user->user_id, 'twitter', 100);
                     }
                     $count = 0;
                     foreach ($tweets as $tweet) {
                         // Figure out if the mention is a retweet
                         if (RetweetDetector::isRetweet($tweet['post_text'], $this->user->username)) {
                             $this->logger->logInfo("Retweet found, " . substr($tweet['post_text'], 0, 50) . "... ", __METHOD__ . ',' . __LINE__);
                             // if did find retweet, add in_rt_of_user_id info
                             // even if can't find original post id
                             $tweet['in_rt_of_user_id'] = $this->user->user_id;
                             $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['post_text'], $recentTweets);
                             if ($originalTweetId != false) {
                                 $tweet['in_retweet_of_post_id'] = $originalTweetId;
                                 $this->logger->logInfo("Retweet original status ID found: " . $originalTweetId, __METHOD__ . ',' . __LINE__);
                             }
                         }
                         $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger);
                         if ($inserted_post_key !== false) {
                             $count++;
                             //expand and insert links contained in tweet
                             URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger);
                             if ($tweet['user_id'] != $this->user->user_id) {
                                 //don't update owner info from reply
                                 $u = new User($tweet, 'mentions');
                                 $this->user_dao->updateUser($u);
                             }
                         }
                     }
                     if ($got_newest_mentions) {
                         if ($count > 0) {
                             $status_message .= count($tweets) . " mentions on page " . $this->instance->last_page_fetched_replies . " and {$count} saved";
                             $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                             $status_message = "";
                         }
                     } else {
                         if ($count == 0) {
                             $status_message = "No new mentions found.";
                             $this->logger->logUserInfo($status_message, __METHOD__ . ',' . __LINE__);
                         } else {
                             $status_message .= count($tweets) . " mentions found and {$count} saved";
                             $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                         }
                         $status_message = "";
                     }
                     $got_newest_mentions = true;
                     if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) {
                         $continue_fetching = false;
                         $status_message .= 'Retrieved newest mentions; Archive loaded; Stopping reply fetch.';
                         $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__);
                         $status_message = "";
                     }
                 }
             }
         }
     }
 }
Пример #4
0
 public function fetchInstanceUserMentions()
 {
     if (!isset($this->owner_object)) {
         $this->fetchInstanceUserInfo();
     }
     if (isset($this->owner_object)) {
         $status_message = "";
         // Get owner's mentions
         if ($this->api->available_api_calls_for_crawler > 0) {
             $got_newest_mentions = false;
             $continue_fetching = true;
             while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) {
                 # Get the most recent mentions
                 $mentions = $this->api->cURL_source['mentions'];
                 $args = array();
                 $args['count'] = 200;
                 $args['include_rts'] = 'true';
                 if ($got_newest_mentions) {
                     $this->last_page_fetched_mentions++;
                     $args['page'] = $this->last_page_fetched_mentions;
                 }
                 list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $args);
                 if ($cURL_status > 200) {
                     $continue_fetching = false;
                 } else {
                     try {
                         $count = 0;
                         $tweets = $this->api->parseXML($twitter_data);
                         if (count($tweets) == 0 && $got_newest_mentions) {
                             # you're paged back and no new tweets
                             $this->last_page_fetched_mentions = 1;
                             $continue_fetching = false;
                             $this->instance->is_archive_loaded_mentions = true;
                             $status_message = 'Paged back but not finding new mentions; moving on.';
                             $this->logger->logStatus($status_message, get_class($this));
                             $status_message = "";
                         }
                         $pd = DAOFactory::getDAO('PostDAO');
                         if (!isset($recentTweets)) {
                             $recentTweets = $pd->getAllPosts($this->owner_object->user_id, 'twitter', 100);
                         }
                         $count = 0;
                         foreach ($tweets as $tweet) {
                             // Figure out if the mention is a retweet
                             if (RetweetDetector::isRetweet($tweet['post_text'], $this->owner_object->username)) {
                                 $this->logger->logStatus("Retweet found, " . substr($tweet['post_text'], 0, 50) . "... ", get_class($this));
                                 $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['post_text'], $recentTweets);
                                 if ($originalTweetId != false) {
                                     $tweet['in_retweet_of_post_id'] = $originalTweetId;
                                     $this->logger->logStatus("Retweet original status ID found: " . $originalTweetId, get_class($this));
                                 }
                             }
                             if ($pd->addPost($tweet, $this->owner_object, $this->logger) > 0) {
                                 $count++;
                                 //expand and insert links contained in tweet
                                 $this->processTweetURLs($tweet);
                                 if ($tweet['user_id'] != $this->owner_object->user_id) {
                                     //don't update owner info from reply
                                     $u = new User($tweet, 'mentions');
                                     $this->user_dao->updateUser($u);
                                 }
                             }
                         }
                         $status_message .= count($tweets) . " mentions found and {$count} saved";
                         $this->logger->logStatus($status_message, get_class($this));
                         $status_message = "";
                         $got_newest_mentions = true;
                         $this->logger->logStatus($status_message, get_class($this));
                         $status_message = "";
                         if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) {
                             $continue_fetching = false;
                             $status_message .= 'Retrieved newest mentions; Archive loaded; Stopping reply fetch.';
                             $this->logger->logStatus($status_message, get_class($this));
                             $status_message = "";
                         }
                     } catch (Exception $e) {
                         $status_message = 'Could not parse mentions XML for $this->owner_object->username';
                         $this->logger->logStatus($status_message, get_class($this));
                         $status_message = "";
                     }
                 }
             }
         } else {
             $status_message = 'Crawler API error: either call limit exceeded or API returned an error.';
         }
         $this->logger->logStatus($status_message, get_class($this));
         $status_message = "";
     } else {
         $this->logger->logStatus("Cannot fetch search results; Owner object has not been set.", get_class($this));
     }
 }
Пример #5
0
 function fetchInstanceUserMentions($lurl, $fa)
 {
     $status_message = "";
     // Get owner's mentions
     if ($this->api->available_api_calls_for_crawler > 0) {
         $got_newest_mentions = false;
         $continue_fetching = true;
         while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) {
             # Get the most recent mentions
             $mentions = str_replace("[id]", $this->owner_object->username, $this->api->cURL_source['mentions']);
             $args = array();
             $args['count'] = 200;
             if ($got_newest_mentions) {
                 $this->last_page_fetched_mentions++;
                 $args['page'] = $this->last_page_fetched_mentions;
             }
             list($cURL_status, $twitter_data) = $this->api->apiRequest($mentions, $this->logger, $args);
             if ($cURL_status > 200) {
                 $continue_fetching = false;
             } else {
                 try {
                     $count = 0;
                     $tweets = $this->api->parseXML($twitter_data);
                     if (count($tweets) == 0 && $got_newest_mentions) {
                         # you're paged back and no new tweets
                         $this->last_page_fetched_mentions = 1;
                         $continue_fetching = false;
                         $this->instance->is_archive_loaded_mentions = true;
                         $status_message = 'Paged back but not finding new mentions; moving on.';
                         $this->logger->logStatus($status_message, get_class($this));
                         $status_message = "";
                     }
                     $td = new TweetDAO($this->db, $this->logger);
                     if (!isset($recentTweets)) {
                         $recentTweets = $td->getAllTweets($this->owner_object->id, 15);
                     }
                     $count = 0;
                     foreach ($tweets as $tweet) {
                         // Figure out if the mention is a retweet
                         if (RetweetDetector::isRetweet($tweet['tweet_text'], $this->owner_object->username)) {
                             $this->logger->logStatus("Retweet found, " . substr($tweet['tweet_text'], 0, 50) . "... ", get_class($this));
                             $originalTweetId = RetweetDetector::detectOriginalTweet($tweet['tweet_text'], $recentTweets);
                             if ($originalTweetId != false) {
                                 $tweet['in_retweet_of_status_id'] = $originalTweetId;
                                 $this->logger->logStatus("Retweet original status ID found: " . $originalTweetId, get_class($this));
                             }
                         }
                         if ($td->addTweet($tweet, $this->owner_object, $this->logger) > 0) {
                             $count++;
                             //expand and insert links contained in tweet
                             $this->processTweetURLs($tweet, $lurl, $fa);
                             if ($tweet['user_id'] != $this->owner_object->id) {
                                 //don't update owner info from reply
                                 $u = new User($tweet, 'mentions');
                                 $this->ud->updateUser($u, $this->logger);
                             }
                         }
                     }
                     $status_message .= count($tweets) . " mentions found and {$count} saved";
                     $this->logger->logStatus($status_message, get_class($this));
                     $status_message = "";
                     $got_newest_mentions = true;
                     $this->logger->logStatus($status_message, get_class($this));
                     $status_message = "";
                     if ($got_newest_mentions && $this->instance->is_archive_loaded_replies) {
                         $continue_fetching = false;
                         $status_message .= 'Retrieved newest mentions; Reply archive loaded; Stopping reply fetch.';
                         $this->logger->logStatus($status_message, get_class($this));
                         $status_message = "";
                     }
                 } catch (Exception $e) {
                     $status_message = 'Could not parse mentions XML for $this->owner_object->username';
                     $this->logger->logStatus($status_message, get_class($this));
                     $status_message = "";
                 }
             }
         }
     } else {
         $status_message = 'Crawler API call limit exceeded.';
     }
     $this->logger->logStatus($status_message, get_class($this));
     $status_message = "";
 }