/**
  * Fetch instance users's friends tweets and friends.
  */
 public function fetchFriendTweetsAndFriends()
 {
     if (!isset($this->user)) {
         $this->fetchInstanceUserInfo();
     }
     if (isset($this->user)) {
         $fd = DAOFactory::getDAO('FollowDAO');
         $pd = DAOFactory::getDAO('PostDAO');
         $continue_fetching = true;
         while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) {
             $stale_friend = $fd->getStalestFriend($this->user->user_id, 'twitter');
             if ($stale_friend != null) {
                 $this->logger->logInfo($stale_friend->username . " is friend most need of update", __METHOD__ . ',' . __LINE__);
                 $stale_friend_tweets = str_replace("[id]", $stale_friend->username, $this->api->cURL_source['user_timeline']);
                 $args = array();
                 $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100;
                 $args['count'] = $count_arg;
                 if ($stale_friend->last_post_id > 0) {
                     $args['since_id'] = $stale_friend->last_post_id;
                 }
                 list($cURL_status, $twitter_data) = $this->api->apiRequest($stale_friend_tweets, $args);
                 if ($cURL_status == 200) {
                     $count = 0;
                     $tweets = $this->api->parseXML($twitter_data);
                     if (count($tweets) > 0) {
                         $stale_friend_updated_from_tweets = false;
                         foreach ($tweets as $tweet) {
                             if ($pd->addPost($tweet, $stale_friend, $this->logger) > 0) {
                                 $count++;
                                 //expand and insert links contained in tweet
                                 URLProcessor::processTweetURLs($this->logger, $tweet);
                             }
                             if (!$stale_friend_updated_from_tweets) {
                                 //Update stale_friend values here
                                 $stale_friend->full_name = $tweet['full_name'];
                                 $stale_friend->avatar = $tweet['avatar'];
                                 $stale_friend->location = $tweet['location'];
                                 $stale_friend->description = $tweet['description'];
                                 $stale_friend->url = $tweet['url'];
                                 $stale_friend->is_protected = $tweet['is_protected'];
                                 $stale_friend->follower_count = $tweet['follower_count'];
                                 $stale_friend->friend_count = $tweet['friend_count'];
                                 $stale_friend->post_count = $tweet['post_count'];
                                 $stale_friend->joined = date_format(date_create($tweet['joined']), "Y-m-d H:i:s");
                                 if ($tweet['post_id'] > $stale_friend->last_post_id) {
                                     $stale_friend->last_post_id = $tweet['post_id'];
                                 }
                                 $this->user_dao->updateUser($stale_friend);
                                 $stale_friend_updated_from_tweets = true;
                             }
                         }
                     } else {
                         $this->fetchAndAddUser($stale_friend->user_id, "Friends");
                     }
                     $this->logger->logInfo(count($tweets) . " tweet(s) found for " . $stale_friend->username . " and " . $count . " saved", __METHOD__ . ',' . __LINE__);
                     $this->fetchUserFriendsByIDs($stale_friend->user_id, $fd);
                 } elseif ($cURL_status == 401 || $cURL_status == 404) {
                     $e = $this->api->parseError($twitter_data);
                     $ued = DAOFactory::getDAO('UserErrorDAO');
                     $ued->insertError($stale_friend->user_id, $cURL_status, isset($e['error']) ? $e['error'] : $twitter_data, $this->user->user_id, 'twitter');
                     $this->logger->logInfo('User error saved', __METHOD__ . ',' . __LINE__);
                 }
             } else {
                 $this->logger->logInfo('No friend staler than 1 day', __METHOD__ . ',' . __LINE__);
                 $continue_fetching = false;
             }
         }
     }
 }
 public function testProcessTweetURLs()
 {
     //Twitpic
     $tweet["post_id"] = 100;
     $tweet['post_text'] = "This is a Twitpic post http://twitpic.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitpic.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitpic.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitpic.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 100);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Yfrog
     $tweet["post_id"] = 101;
     $tweet['post_text'] = "This is a Yfrog post http://yfrog.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://yfrog.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://yfrog.com/blah');
     $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah.th.jpg');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 101);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Twitgoo
     $tweet["post_id"] = 102;
     $tweet['post_text'] = "This is a Twitgoo post http://twitgoo.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitgoo.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 102);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Picplz
     $tweet["post_id"] = 103;
     $tweet['post_text'] = "This is a Picplz post http://picplz.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://picplz.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://picplz.com/blah');
     $this->assertEqual($result->expanded_url, 'http://picplz.com/blah/thumb/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 103);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     // instagr.am
     // check first with ending slash in URL (which the URLs 'should' include)
     $tweet["post_id"] = 104;
     $tweet['post_text'] = "This is an instagram post http:/instagr.am/blah/ Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah/');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah/');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 104);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     // check w/out ending slash also just in case
     $tweet["post_id"] = 105;
     $tweet['post_text'] = "This is an instagram post http:/instagr.am/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 105);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Flic.kr
     $tweet["post_id"] = 106;
     $tweet['post_text'] = "This is a Flickr post http://flic.kr/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://flic.kr/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://flic.kr/blah');
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 106);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
 }
 public function addPostAndAssociatedInfo(array $vals, $entities = null, $user_array = null)
 {
     $urls = null;
     // first add post
     $new_post_key = $this->addPost($vals);
     // if post did not already exist
     if ($new_post_key) {
         if ($user_array) {
             $u = new User($user_array);
             $user_dao = DAOFactory::getDAO('UserDAO');
             $user_dao->setLoggerInstance($this->logger);
             $user_dao->updateUser($u);
         }
         if ($entities && isset($entities['urls'])) {
             $urls = $entities['urls'];
         }
         $urls = URLProcessor::processPostURLs($vals['post_text'], $vals['post_id'], 'twitter', $this->logger, $urls);
         if ($vals['photos']) {
             $link_dao = DAOFactory::getDAO('LinkDAO');
             foreach ($vals['photos'] as $photo) {
                 $photo = (object) $photo;
                 $link_dao->saveExpandedURL($photo->url, $photo->expanded_url, null, $photo->media_url, null);
             }
         }
         if (isset($entities)) {
             if (isset($entities['mentions'])) {
                 $mention_dao = DAOFactory::getDAO('MentionDAO');
                 $mention_dao->setLoggerInstance($this->logger);
                 $mention_dao->insertMentions($entities['mentions'], $vals['post_id'], $vals['author_user_id'], $vals['network']);
             }
             if (isset($entities['hashtags'])) {
                 $hashtagpost_dao = DAOFactory::getDAO('HashtagPostDAO');
                 $hashtagpost_dao->setLoggerInstance($this->logger);
                 $hashtagpost_dao->insertHashtagPosts($entities['hashtags'], $vals['post_id'], $vals['network']);
             }
             if (isset($entities['place'])) {
                 $place = $entities['place'];
                 if ($place) {
                     $place_dao = DAOFactory::getDAO('PlaceDAO');
                     $place_dao->setLoggerInstance($this->logger);
                     $place_dao->insertPlace($place, $vals['post_id'], $vals['network']);
                 }
             }
         }
     }
     return $new_post_key;
 }
 public function testProcessPostURLs()
 {
     $builders = array();
     $network = 'twitter';
     //Twitpic
     $post_id = 100;
     $post_text = "This is a Twitpic post http://twitpic.com/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitpic.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitpic.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitpic.com/blah');
     $this->assertEqual($result->image_src, 'http://twitpic.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 100);
     //Yfrog
     $post_id = 101;
     $post_text = "This is a Yfrog post http://yfrog.com/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://yfrog.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://yfrog.com/blah');
     $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah');
     $this->assertEqual($result->image_src, 'http://yfrog.com/blah.th.jpg');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 101);
     //Twitgoo
     $post_id = 102;
     $post_text = "This is a Twitgoo post http://twitgoo.com/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->image_src, 'http://twitgoo.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 102);
     //Picplz
     $post_id = 103;
     $post_text = "This is a Picplz post http://picplz.com/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://picplz.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://picplz.com/blah');
     $this->assertEqual($result->expanded_url, 'http://picplz.com/blah');
     $this->assertEqual($result->image_src, 'http://picplz.com/blah/thumb/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 103);
     // instagr.am
     // check first with ending slash in URL (which the URLs 'should' include)
     $post_id = 104;
     $post_text = "This is an instagram post http:/instagr.am/blah/ Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah/');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah/');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/');
     $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 104);
     // check w/out ending slash also just in case
     $post_id = 105;
     $post_text = "This is an instagram post http:/instagr.am/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah');
     $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 105);
     //Flic.kr
     $post_id = 106;
     $post_text = "This is a Flickr post http://flic.kr/blah Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://flic.kr/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://flic.kr/blah');
     //still need to expand the flic.kr link
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->image_src, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 106);
     $post_id = 107;
     $post_text = "This is a post with a curly quote closing the link http://t.co/2JVSpi5 yo";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://t.co/2JVSpi5');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://t.co/2JVSpi5');
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->image_src, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 107);
     //Lockerz
     $post_id = 108;
     $post_text = "This is a lockerz post http://lockerz.com/s/138376416 Yay!";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://lockerz.com/s/138376416');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://lockerz.com/s/138376416');
     $this->assertEqual($result->expanded_url, 'http://lockerz.com/s/138376416');
     $this->assertEqual($result->image_src, 'http://api.plixi.com/api/tpapi.svc/imagefromurl?url=http://plixi.com/p/138376416&size=thumbnail');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_key, 108);
     //test facebook
     $network = 'facebook';
 }
 /**
  * Abstraction for pulling data from a file or url
  * @throws exception
  * @param str $url
  * @return request response data
  */
 private function fetchURLData($url)
 {
     if (strpos($url, "/") == 0 || strpos($url, ".") == 0) {
         // we are a file path, so use file_get_contents
         $contents = file_get_contents($url);
     } else {
         // else we are a url, so use our Util::getURLContents
         $contents = Utils::getURLContents(URLProcessor::getFinalURL($url));
     }
     if (is_null($contents)) {
         $contents = false;
     }
     return $contents;
 }
 public function addPostAndEntities($vals, $entities)
 {
     $urls = null;
     // first add post
     $retval = $this->addPost($vals);
     // if post did not already exist
     if ($retval) {
         // then process entity information as available.
         if (isset($entities) && isset($entities['urls'])) {
             $urls = $entities['urls'];
         }
         // if $urls is null, will extract from tweet content.
         URLProcessor::processTweetURLs($this->logger, $vals, $urls);
     }
     return $retval;
 }
Beispiel #7
0
 /**
  * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links.
  */
 public function expandOriginalURLs($flickr_api_key = null)
 {
     $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit);
     $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     $has_expanded_flickr_link = false;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link->url)) {
             $endless_loop_prevention_counter = 0;
             $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link->url;
             while (!$fully_expanded) {
                 //begin Flickr thumbnail processing
                 if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
                     self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url);
                     $has_expanded_flickr_link = true;
                     $fully_expanded = true;
                 }
                 //end Flickr thumbnail processing
                 $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger);
                 if ($expanded_url == $short_link || $expanded_url == '' || $endless_loop_prevention_counter > self::EXPANSION_CAP) {
                     $fully_expanded = true;
                 } else {
                     try {
                         $this->short_link_dao->insert($link->id, $short_link);
                     } catch (DataExceedsColumnWidthException $e) {
                         $this->logger->logError($short_link . " short link record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__);
                         $fully_expanded = true;
                     }
                 }
                 if (strlen($expanded_url) < 256) {
                     $short_link = $expanded_url;
                 } else {
                     $fully_expanded = true;
                 }
                 $endless_loop_prevention_counter++;
             }
             if (!$has_expanded_flickr_link) {
                 if ($expanded_url != '') {
                     $image_src = URLProcessor::getImageSource($expanded_url);
                     $url_details = URLExpander::getWebPageDetails($expanded_url);
                     try {
                         $this->link_dao->saveExpandedUrl($link->url, $expanded_url, $url_details['title'], $image_src, $url_details['description']);
                         $total_expanded = $total_expanded + 1;
                     } catch (DataExceedsColumnWidthException $e) {
                         $this->logger->logError($link->url . " record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__);
                         $this->link_dao->saveExpansionError($link->url, "URL exceeds column width");
                         $total_errors = $total_errors + 1;
                     }
                 } else {
                     $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__);
                     $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere");
                     $total_errors = $total_errors + 1;
                 }
             }
         } else {
             $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $this->link_dao->saveExpansionError($link->url, "Invalid URL");
             $total_errors = $total_errors + 1;
         }
         $has_expanded_flickr_link = false;
     }
     $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }
 public function testProcessPostMultipleURLs()
 {
     $post_id = 100;
     $network = 'twitter';
     $post_text = "http://awesome.com/ http://cool.com/";
     $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text));
     $urls = URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $this->assertEqual(count($urls), 2);
     $this->assertEqual($urls[0], 'http://awesome.com/');
     $this->assertEqual($urls[1], 'http://cool.com/');
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $result = $link_dao->getLinksForPost($post_id, 'twitter');
     $this->assertEqual(count($result), 2);
     $this->assertIsA($result[0], 'Link');
     $this->assertEqual($result[0]->url, 'http://awesome.com/');
     $this->assertEqual($result[1]->url, 'http://cool.com/');
 }
 /**
  * Fetch instance user's favorites since the last favorite stored.
  */
 public function fetchInstanceUserFavorites()
 {
     if (!isset($this->user)) {
         $this->fetchInstanceUserInfo();
     }
     $this->logger->logUserInfo("Checking for new favorites.", __METHOD__ . ',' . __LINE__);
     $last_fav_id = $this->instance->last_favorite_id;
     $this->logger->logInfo("Owner favs: " . $this->user->favorites_count . ", instance owner favs in system: " . $this->instance->owner_favs_in_system, __METHOD__ . ',' . __LINE__);
     $continue = true;
     while ($continue) {
         list($tweets, $http_status, $payload) = $this->getFavorites($last_fav_id);
         if ($http_status == 200) {
             if (sizeof($tweets) == 0) {
                 // then done -- this should happen when we have run out of favs
                 $this->logger->logInfo("It appears that we have run out of favorites to process", __METHOD__ . ',' . __LINE__);
                 $continue = false;
             } else {
                 $post_dao = DAOFactory::getDAO('FavoritePostDAO');
                 foreach ($tweets as $tweet) {
                     $tweet['network'] = 'twitter';
                     if ($post_dao->addFavorite($this->user->user_id, $tweet) > 0) {
                         URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger);
                         $this->logger->logInfo("found new fav: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__);
                         $fcount++;
                         $this->logger->logInfo("fcount: {$fcount}", __METHOD__ . ',' . __LINE__);
                         $this->logger->logInfo("added favorite: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__);
                     } else {
                         // fav was already stored, so take no action. This could happen both because some
                         // of the favs on the given page were processed last time, or because a separate process,
                         // such as a UserStream process, is also watching for and storing favs.
                         $status_message = "have already stored fav " . $tweet['post_id'];
                         $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__);
                     }
                     // keep track of the highest fav id we've encountered
                     if ($tweet['post_id'] > $last_fav_id) {
                         $last_fav_id = $tweet['post_id'];
                     }
                 }
                 // end foreach
             }
         } else {
             $continue = false;
         }
     }
 }
 /**
  * Save expanded version of all unexpanded URLs to data store.
  */
 public function expandRemainingURLs()
 {
     $logger = Logger::getInstance();
     $link_dao = DAOFactory::getDAO('LinkDAO');
     $links_to_expand = $link_dao->getLinksToExpand($this->link_limit);
     $logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link)) {
             $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link;
             while (!$fully_expanded) {
                 $expanded_url = self::untinyurl($short_link, $link_dao, $link, $index, count($links_to_expand));
                 if ($expanded_url == $short_link || $expanded_url == '') {
                     $fully_expanded = true;
                 }
                 $short_link = $expanded_url;
             }
             if ($expanded_url != '') {
                 $image_src = URLProcessor::getImageSource($expanded_url);
                 $link_dao->saveExpandedUrl($link, $expanded_url, '', $image_src);
                 $total_expanded = $total_expanded + 1;
             } else {
                 $total_errors = $total_errors + 1;
             }
         } else {
             $total_errors = $total_errors + 1;
             $logger->logError($link . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $link_dao->saveExpansionError($link, "Invalid URL");
         }
     }
     $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }
Beispiel #11
0
 public function addPostAndAssociatedInfo(array $vals, $entities = null, $user_array = null)
 {
     $urls = null;
     // first add post
     $retval = $this->addPost($vals);
     // if post did not already exist
     if ($retval) {
         if ($user_array) {
             $u = new User($user_array);
             $user_dao = DAOFactory::getDAO('UserDAO');
             $user_dao->setLoggerInstance($this->logger);
             $user_dao->updateUser($u);
         }
         if ($entities && isset($entities['urls'])) {
             $urls = $entities['urls'];
         }
         URLProcessor::processPostURLs($vals['post_text'], $vals['post_id'], 'twitter', $this->logger, $urls);
         if (isset($entities)) {
             if (isset($entities['mentions'])) {
                 $mdao = DAOFactory::getDAO('MentionDAO');
                 $mdao->setLoggerInstance($this->logger);
                 $mdao->insertMentions($entities['mentions'], $vals['post_id'], $vals['author_user_id'], $vals['network']);
             }
             if (isset($entities['hashtags'])) {
                 $hdao = DAOFactory::getDAO('HashtagDAO');
                 $hdao->setLoggerInstance($this->logger);
                 $hdao->insertHashtags($entities['hashtags'], $vals['post_id'], $vals['network']);
             }
             if (isset($entities['place'])) {
                 $place = $entities['place'];
                 if ($place) {
                     $place_dao = DAOFactory::getDAO('PlaceDAO');
                     $place_dao->setLoggerInstance($this->logger);
                     $place_dao->insertPlace($place, $vals['post_id'], $vals['network']);
                 }
             }
         }
     }
     return $retval;
 }
Beispiel #12
0
 public function testProcessPostURLs()
 {
     $network = 'twitter';
     //Twitpic
     $post_id = 100;
     $post_text = "This is a Twitpic post http://twitpic.com/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitpic.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitpic.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitpic.com/blah');
     $this->assertEqual($result->image_src, 'http://twitpic.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 100);
     $this->assertEqual($result->network, 'twitter');
     //Yfrog
     $post_id = 101;
     $post_text = "This is a Yfrog post http://yfrog.com/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://yfrog.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://yfrog.com/blah');
     $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah');
     $this->assertEqual($result->image_src, 'http://yfrog.com/blah.th.jpg');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 101);
     $this->assertEqual($result->network, 'twitter');
     //Twitgoo
     $post_id = 102;
     $post_text = "This is a Twitgoo post http://twitgoo.com/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->image_src, 'http://twitgoo.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 102);
     $this->assertEqual($result->network, 'twitter');
     //test facebook
     $network = 'facebook';
     //Picplz
     $post_id = 103;
     $post_text = "This is a Picplz post http://picplz.com/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://picplz.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://picplz.com/blah');
     $this->assertEqual($result->expanded_url, 'http://picplz.com/blah');
     $this->assertEqual($result->image_src, 'http://picplz.com/blah/thumb/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 103);
     $this->assertEqual($result->network, 'facebook');
     // instagr.am
     // check first with ending slash in URL (which the URLs 'should' include)
     $post_id = 104;
     $post_text = "This is an instagram post http:/instagr.am/blah/ Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah/');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah/');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/');
     $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 104);
     $this->assertEqual($result->network, 'facebook');
     // check w/out ending slash also just in case
     $post_id = 105;
     $post_text = "This is an instagram post http:/instagr.am/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $result = $link_dao->getLinkByUrl('http://instagr.am/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://instagr.am/blah');
     $this->assertEqual($result->expanded_url, 'http://instagr.am/blah');
     $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 105);
     $this->assertEqual($result->network, 'facebook');
     //Flic.kr
     $post_id = 106;
     $post_text = "This is a Flickr post http://flic.kr/blah Yay!";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://flic.kr/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://flic.kr/blah');
     //still need to expand the flic.kr link
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->image_src, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 106);
     $this->assertEqual($result->network, 'facebook');
     $post_id = 107;
     $post_text = "This is a post with a curly quote closing the link http://t.co/2JVSpi5Ó yo";
     URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://t.co/2JVSpi5');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://t.co/2JVSpi5');
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->image_src, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 107);
     $this->assertEqual($result->network, 'facebook');
 }
 public function testProcessTweetURLs()
 {
     //Twitpic
     $tweet["post_id"] = 100;
     $tweet['post_text'] = "This is a Twitpic post http://twitpic.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitpic.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitpic.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitpic.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 100);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Yfrog
     $tweet["post_id"] = 101;
     $tweet['post_text'] = "This is a Yfrog post http://yfrog.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://yfrog.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://yfrog.com/blah');
     $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah.th.jpg');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 101);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Twitgoo
     $tweet["post_id"] = 102;
     $tweet['post_text'] = "This is a Twitgoo post http://twitgoo.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://twitgoo.com/blah');
     $this->assertEqual($result->expanded_url, 'http://twitgoo.com/show/thumb/blah');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 102);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Picplz
     $tweet["post_id"] = 103;
     $tweet['post_text'] = "This is a Picplz post http://picplz.com/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://picplz.com/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://picplz.com/blah');
     $this->assertEqual($result->expanded_url, 'http://picplz.com/blah/thumb/');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 103);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
     //Flic.kr
     $tweet["post_id"] = 104;
     $tweet['post_text'] = "This is a Flickr post http://flic.kr/blah Yay!";
     URLProcessor::processTweetURLs($this->logger, $tweet);
     $link_dao = new LinkMySQLDAO();
     $result = $link_dao->getLinkByUrl('http://flic.kr/blah');
     $this->assertIsA($result, "Link");
     $this->assertEqual($result->url, 'http://flic.kr/blah');
     $this->assertEqual($result->expanded_url, '');
     $this->assertEqual($result->title, '');
     $this->assertEqual($result->post_id, 104);
     $this->assertEqual($result->network, 'twitter');
     $this->assertTrue($result->is_image);
 }
 /**
  * cleanUpMissedFavsUnFavs  pages back through the older pages of favs, checking for favs that are not yet in
  * the database, as well as favs that were added to the db but are no longer returned by Twitter's API.
  * However, that latter calculation, for un-fav'd tweets, is currently not reliable due to a bug on Twitter's end,
  * and so such tweets are not currently removed from the database.
  * Due to the same issue with the API, it's not clear whether all favs of older tweets are going to be actually
  * returned from Twitter (that is, it is currently not returning some actually-favorited tweets in a given range).
  * So, we may miss some older tweets that were in fact favorited, until Twitter fixes this.
  * The number of pages to page back for each run of the crawler is set by favs_cleanup_pages option.
  */
 public function cleanUpMissedFavsUnFavs()
 {
     // first, check that we have the resources to do work
     if (!($this->api->available && $this->api->available_api_calls_for_crawler)) {
         $this->logger->logInfo("terminating cleanUpMissedFavsUnFavs-- no API calls available", __METHOD__ . ',' . __LINE__);
         return true;
     }
     $this->logger->logInfo("In cleanUpMissedFavsUnFavs", __METHOD__ . ',' . __LINE__);
     $this->logger->logInfo("User id: " . $this->user->user_id . "\n", __METHOD__ . ',' . __LINE__);
     $fcount = 0;
     $favs_cleanup_pages = 1;
     // default number of pages to process each time the crawler runs
     // get plugin option value if it exists & is positive int, otherwise use default
     $topt = $this->twitter_options;
     if (isset($topt['favs_cleanup_pages'])) {
         $conf_favs_cleanup_pages = $topt['favs_cleanup_pages']->option_value;
         $this->logger->logInfo("conf_favs_cleanup_pages: {$conf_favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__);
         if (is_integer((int) $conf_favs_cleanup_pages) && $conf_favs_cleanup_pages > 0) {
             $favs_cleanup_pages = $conf_favs_cleanup_pages;
         }
     }
     $this->logger->logInfo("favs_cleanup_pages: {$favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__);
     $fpd = DAOFactory::getDAO('FavoritePostDAO');
     $pagesize = 20;
     // number of favs per page retrieved from the API call... (tbd: any way to get
     //this from the API?)
     // get 'favs_older_pages' plugin option value if it exists & is pos. int.  Use it to calculate default start
     // page if set, otherwise use default value.
     $default_start_page = 2;
     $topt = $this->twitter_options;
     if (isset($topt['favs_older_pages'])) {
         $conf_older_favs_pages = $topt['favs_older_pages']->option_value;
         if (is_integer((int) $conf_older_favs_pages) && $conf_older_favs_pages > 0) {
             $default_start_page = $conf_older_favs_pages + 1;
         }
     }
     $this->logger->logInfo("default start page: {$default_start_page} ", __METHOD__ . ',' . __LINE__);
     $last_page_of_favs = round($this->api->archive_limit / $pagesize);
     $last_unfav_page_checked = $this->instance->last_unfav_page_checked;
     $start_page = $last_unfav_page_checked > 0 ? $last_unfav_page_checked + 1 : $default_start_page;
     $this->logger->logInfo("start page: {$start_page}, with {$favs_cleanup_pages} cleanup pages", __METHOD__ . ',' . __LINE__);
     $curr_favs_count = $this->user->favorites_count;
     $count = 0;
     $page = $start_page;
     while ($count < $favs_cleanup_pages && $this->api->available && $this->api->available_api_calls_for_crawler) {
         // get the favs from that page
         try {
             list($tweets, $cURL_status, $twitter_data) = $this->getFavsPage($page);
         } catch (APICallLimitExceededException $e) {
             break;
         }
         if ($cURL_status != 200 || $tweets == -1) {
             // todo - handle more informatively
             $this->logger->logInfo("in cleanUpMissedFavsUnFavs, error with: {$twitter_data}", __METHOD__ . ',' . __LINE__);
             throw new Exception("in cleanUpUnFavs: error parsing favs");
         }
         if (sizeof($tweets) == 0) {
             // then done paging backwards through the favs.
             // reset pointer so that we start at the recent favs again next time through.
             $this->instance->last_unfav_page_checked = 0;
             break;
         }
         $min_tweet = $tweets[sizeof($tweets) - 1]['post_id'];
         $max_tweet = $tweets[0]['post_id'];
         $this->logger->logInfo("in cleanUpUnFavs, page {$page} min and max: {$min_tweet}, {$max_tweet}", __METHOD__ . ',' . __LINE__);
         foreach ($tweets as $fav) {
             $fav['network'] = 'twitter';
             // check whether the tweet is in the db-- if not, add it.
             if ($fpd->addFavorite($this->user->user_id, $fav) > 0) {
                 URLProcessor::processPostURLs($fav['post_text'], $fav['post_id'], 'twitter', $this->logger);
                 $this->logger->logInfo("added fav " . $fav['post_id'], __METHOD__ . ',' . __LINE__);
                 $fcount++;
             } else {
                 $status_message = "have already stored fav " . $fav['post_id'];
                 $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__);
             }
         }
         // now for each favorited tweet in the database within the fetched range, check whether it's still
         // favorited. This part of the method is currently disabled due to issues with the Twitter API, which
         // is not returning all of the favorited tweets any more.  So, the fact that a previously-archived
         // tweet is not returned, no longer indicates that it was un-fav'd.
         // The method still IDs the 'missing' tweets, but no longer deletes them.  We may want to get rid of
         //  this check altogether at some point.
         $fposts = $fpd->getAllFavoritePostsUpperBound($this->user->user_id, 'twitter', $pagesize, $max_tweet + 1);
         foreach ($fposts as $old_fav) {
             $old_fav_id = $old_fav->post_id;
             if ($old_fav_id < $min_tweet) {
                 $this->logger->logInfo("Old fav {$old_fav_id} out of range ", __METHOD__ . ',' . __LINE__);
                 break;
                 // all the rest will be out of range also then
             }
             // look for the old_fav_id in the array of fetched favs
             $found = false;
             foreach ($tweets as $tweet) {
                 if ($old_fav_id == $tweet['post_id']) {
                     $found = true;
                     break;
                 }
             }
             if (!$found) {
                 // if it's not there...
                 // 14/10 arghh -- Twitter is suddenly (temporarily?) not returning all fav'd tweets in a
                 // sequence.
                 // skipping the delete for now, keep tabs on it.  Can check before delete with extra API
                 // request, but the point of doing it this way was to avoid the additional API request.
                 $this->logger->logInfo("Twitter claims tweet not still favorited, but this is currently " . "broken, so not deleting: " . $old_fav_id, __METHOD__ . ',' . __LINE__);
                 // 'unfavorite' by removing from favorites table
                 // $fpd->unFavorite($old_fav_id, $this->user->user_id);
             }
         }
         $this->instance->last_unfav_page_checked = $page++;
         if ($page > $last_page_of_favs) {
             $page = 0;
             break;
         }
         $count++;
     }
     $this->logger->logUserSuccess("Added {$fcount} older missed favorites", __METHOD__ . ',' . __LINE__);
     return true;
 }
 /**
  * Retrieve tweets in search results for a keyword/hashtag.
  * @param InstanceHashtag $instance_hashtag
  * @return void
  */
 public function fetchInstanceHashtagTweets($instance_hashtag)
 {
     if (isset($this->instance)) {
         $status_message = "";
         $continue_fetching = true;
         $since_id = 0;
         $max_id = 0;
         $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO');
         $post_dao = DAOFactory::getDAO('PostDAO');
         $user_dao = DAOFactory::getDAO('UserDAO');
         $hashtagpost_dao = DAOFactory::getDAO('HashtagPostDAO');
         $hashtag_dao = DAOFactory::getDAO('HashtagDAO');
         //Get hashtag
         $hashtag = $hashtag_dao->getHashtagByID($instance_hashtag->hashtag_id);
         while ($continue_fetching) {
             $endpoint = $this->api->endpoints['search_tweets'];
             $args = array();
             $args["q"] = $hashtag->hashtag;
             $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100;
             $args["count"] = $count_arg;
             $args["include_entities"] = "true";
             if ($since_id == 0) {
                 $since_id = $instance_hashtag->last_post_id;
             }
             if ($since_id > 0) {
                 $args["since_id"] = $since_id;
             }
             if ($max_id > $since_id) {
                 $args["max_id"] = $max_id;
             }
             try {
                 list($http_status, $payload) = $this->api->apiRequest($endpoint, $args);
             } catch (APICallLimitExceededException $e) {
                 $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__);
                 break;
             }
             if ($http_status == 200) {
                 $this->logger->logDebug('Search tweets 200 ' . $endpoint->getPath(), __METHOD__ . ',' . __LINE__);
                 $count = 0;
                 $user_count = 0;
                 $tweets = $this->api->parseJSONTweetsFromSearch($payload);
                 foreach ($tweets as $tweet) {
                     $this->logger->logDebug('Processing ' . $tweet['post_id'], __METHOD__ . ',' . __LINE__);
                     $this->logger->logDebug('Processing ' . Utils::varDumpToString($tweet), __METHOD__ . ',' . __LINE__);
                     $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger);
                     //We need to check if post exists before add relationship between post and hashtag
                     if ($post_dao->isPostInDB($tweet['post_id'], 'twitter')) {
                         if (!$hashtagpost_dao->isHashtagPostInStorage($hashtag->id, $tweet['post_id'], 'twitter')) {
                             $count = $count + 1;
                             $hashtagpost_dao->insertHashtagPost($hashtag->hashtag, $tweet['post_id'], 'twitter');
                             $user = new User($tweet);
                             $rows_updated = $user_dao->updateUser($user);
                             if ($rows_updated > 0) {
                                 $user_count = $user_count + $rows_updated;
                             }
                             $this->logger->logDebug('User has been updated', __METHOD__ . ',' . __LINE__);
                             if (isset($tweet['retweeted_post']) && isset($tweet['retweeted_post']['content'])) {
                                 $this->logger->logDebug('Retweeted post info set', __METHOD__ . ',' . __LINE__);
                                 if (!$hashtagpost_dao->isHashtagPostInStorage($hashtag->id, $tweet['retweeted_post']['content']['post_id'], 'twitter')) {
                                     $this->logger->logDebug('Retweeted post not in storage', __METHOD__ . ',' . __LINE__);
                                     $count++;
                                     $hashtagpost_dao->insertHashtagPost($hashtag->hashtag, $tweet['retweeted_post']['content']['post_id'], 'twitter');
                                     $user_retweet = new User($tweet['retweeted_post']['content']);
                                     $rows_retweet_updated = $user_dao->updateUser($user_retweet);
                                     if ($rows_retweet_updated > 0) {
                                         $user_count = $user_count + $rows_retweet_updated;
                                     }
                                 } else {
                                     $this->logger->logDebug('Retweeted post in storage', __METHOD__ . ',' . __LINE__);
                                 }
                             } else {
                                 $this->logger->logDebug('Retweeted post info not set', __METHOD__ . ',' . __LINE__);
                             }
                             $this->logger->logDebug('About to process URLs', __METHOD__ . ',' . __LINE__);
                             URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger);
                             $this->logger->logDebug('URLs have been processed', __METHOD__ . ',' . __LINE__);
                         }
                     }
                     if ($tweet['post_id'] > $instance_hashtag->last_post_id) {
                         $instance_hashtag->last_post_id = $tweet['post_id'];
                     }
                     if ($instance_hashtag->earliest_post_id == 0 || $tweet['post_id'] < $instance_hashtag->earliest_post_id) {
                         $instance_hashtag->earliest_post_id = $tweet['post_id'];
                     }
                     if ($max_id == 0 || $tweet['post_id'] < $max_id) {
                         $max_id = $tweet['post_id'];
                     }
                     $this->logger->logDebug('Instance hashtag markers updated', __METHOD__ . ',' . __LINE__);
                 }
                 //Status message for tweets and users
                 $status_message = ' ' . count($tweets) . " tweet(s) found and {$count} saved";
                 $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                 $status_message = ' ' . count($tweets) . " tweet(s) found and {$user_count} users saved";
                 $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                 //Save instance_hashtag important values
                 if ($instance_hashtag->last_post_id > 0) {
                     $instance_hashtag_dao->updateLastPostID($instance_hashtag->instance_id, $instance_hashtag->hashtag_id, $instance_hashtag->last_post_id);
                 }
                 if ($instance_hashtag->earliest_post_id > 0) {
                     $instance_hashtag_dao->updateEarliestPostID($instance_hashtag->instance_id, $instance_hashtag->hashtag_id, $instance_hashtag->earliest_post_id);
                 }
                 //Not to continue fetching if search not return the maxim number of tweets
                 if (count($tweets) < $count_arg) {
                     $continue_fetching = false;
                 }
             } else {
                 $status_message = "Stop fetching tweets. cURL_status = " . $cURL_status;
                 $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                 $continue_fetching = false;
             }
         }
     }
 }
    public function testProcessTweetInstagramURLs() {
        //instagr.am
        $tweet["post_id"] = 105;
        $tweet['post_text'] = "This is an Instagram post:  http://instagr.am/p/oyQ6/ :)";
        URLProcessor::processTweetURLs($this->logger, $tweet);
        $link_dao = new LinkMySQLDAO();
        $result = $link_dao->getLinkByUrl('http://instagr.am/p/oyQ6/');
        $this->assertIsA($result, "Link");
        $this->assertEqual($result->url, 'http://instagr.am/p/oyQ6/');
        $this->assertEqual($result->expanded_url,
        'http://distillery.s3.amazonaws.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg');
        $this->assertEqual($result->title, '');
        $this->assertEqual($result->post_id, 105);
        $this->assertEqual($result->network, 'twitter');
        $this->assertTrue($result->is_image);

        // bad instagr.am URL
        $tweet["post_id"] = 106;
        $tweet['post_text'] = "This is an Instagram post with a bad URL:  http://instagr.am/p/oyQ5/ :(";
        URLProcessor::processTweetURLs($this->logger, $tweet);
        $link_dao = new LinkMySQLDAO();
        $result = $link_dao->getLinkByUrl('http://instagr.am/p/oyQ5/');
        $this->assertIsA($result, "Link");
        $this->assertEqual($result->url, 'http://instagr.am/p/oyQ5/');
        $this->assertEqual($result->expanded_url, '');
        $this->assertEqual($result->title, '');
        $this->assertEqual($result->post_id, 106);
        $this->assertEqual($result->network, 'twitter');
        $this->assertFalse($result->is_image);

        // test regexp extraction of image link from html
        $api_call = $this->faux_data_path . "/instagr_am_p_oyQ6";
        $resp = file_get_contents($api_call);
        list($eurl, $is_image) = URLProcessor::extractInstagramImageURL($this->logger, $resp);
        $this->assertEqual($eurl,
        'http://distillery.s3.amazonaws.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg');
        $this->assertTrue($is_image);
    }
 /**
  * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links.
  */
 public function expandOriginalURLs($flickr_api_key = null)
 {
     $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit);
     $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__);
     $total_expanded = 0;
     $total_errors = 0;
     $has_expanded_flickr_link = false;
     foreach ($links_to_expand as $index => $link) {
         if (Utils::validateURL($link->url)) {
             $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__);
             //make sure shortened short links--like t.co--get fully expanded
             $fully_expanded = false;
             $short_link = $link->url;
             while (!$fully_expanded) {
                 //begin Flickr thumbnail processing
                 if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
                     self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url);
                     $has_expanded_flickr_link = true;
                     $fully_expanded = true;
                 }
                 //end Flickr thumbnail processing
                 $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger);
                 if ($expanded_url == $short_link || $expanded_url == '') {
                     $fully_expanded = true;
                 } else {
                     $this->short_link_dao->insert($link->id, $short_link);
                 }
                 $short_link = $expanded_url;
             }
             if (!$has_expanded_flickr_link) {
                 if ($expanded_url != '') {
                     $image_src = URLProcessor::getImageSource($expanded_url);
                     $this->link_dao->saveExpandedUrl($link->url, $expanded_url, '', $image_src);
                     $total_expanded = $total_expanded + 1;
                 } else {
                     $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__);
                     $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere");
                     $total_errors = $total_errors + 1;
                 }
             }
         } else {
             $total_errors = $total_errors + 1;
             $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__);
             $this->link_dao->saveExpansionError($link->url, "Invalid URL");
         }
         $has_expanded_flickr_link = false;
     }
     $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__);
 }