/**
  * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url.
  * @TODO Move image thumbnail processng to Expand URLs plugin.
  * @param Logger $logger
  * @param str $tweet
  * @param Array $urls
  */
 public static function processTweetURLs($logger, $tweet, $urls = null)
 {
     $link_dao = DAOFactory::getDAO('LinkDAO');
     if (!$urls) {
         $urls = Post::extractURLs($tweet['post_text']);
     }
     foreach ($urls as $u) {
         $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__);
         $is_image = 0;
         $title = '';
         $eurl = '';
         if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') {
             $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') {
             $eurl = $u . '.th.jpg';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') {
             $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') {
             $eurl = $u . '/thumb/';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') {
             $is_image = 1;
         }
         if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) {
             $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__);
         } else {
             $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__);
         }
     }
 }
Example #2
0
 /**
  * For a given post, extract URLs and store them, including image_src if that's from a known source like Twitpic,
  * Twitgoo, Yfrog, Instagr.am.
  * @param str $post_text
  * @param int $post_id
  * @param str $network
  * @param Logger $logger
  * @param arr $urls Array of URLs, optionally set, defaults to null
  */
 public static function processPostURLs($post_text, $post_id, $network, $logger, $urls = null)
 {
     if (!$urls) {
         $urls = Post::extractURLs($post_text);
     }
     if ($urls) {
         $link_dao = DAOFactory::getDAO('LinkDAO');
         $post_dao = DAOFactory::getDAO('PostDAO');
         $post = $post_dao->getPost($post_id, $network);
         if (isset($post->id)) {
             foreach ($urls as $url) {
                 $logger->logInfo("Processing URL {$url}", __METHOD__ . ',' . __LINE__);
                 $image_src = self::getImageSource($url);
                 //if we have an image_src, the URL is a known image source not in need of expansion
                 $expanded_url = $image_src !== '' ? $url : '';
                 $link_array = array('url' => $url, 'expanded_url' => $expanded_url, "image_src" => $image_src, 'post_key' => $post->id);
                 $link = new Link($link_array);
                 try {
                     $link_dao->insert($link);
                     $logger->logSuccess("Inserted " . $url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . "into links table", __METHOD__ . ',' . __LINE__);
                 } catch (DuplicateLinkException $e) {
                     $logger->logInfo($url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . " already exists in links table", __METHOD__ . ',' . __LINE__);
                 } catch (DataExceedsColumnWidthException $e) {
                     $logger->logInfo($url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . " data exceeds table column width", __METHOD__ . ',' . __LINE__);
                 }
             }
         }
     }
 }
Example #3
0
 public function testExtractURLs()
 {
     $testme = "blah blah blah http:///badurl.com d http://bit.ly and http://example.org";
     $urls = Post::extractURLs($testme);
     $expected = array('http:///badurl.com', 'http://bit.ly', 'http://example.org');
     $this->assertIdentical($expected, $urls);
     //@TODO Finesse the regex to NOT match URLs with triple slashes, http:///badurl.com
 }
Example #4
0
 public function testExtractURLs()
 {
     $test_patterns = array("Introducing the ThinkUp developers mailing list http://bit.ly/gXpdUZ" => array('http://bit.ly/gXpdUZ'), "http://j.mp/g2F037 good advice (Mad Men-illustrated) for women in tech" => array('http://j.mp/g2F037'), "blah blah blah http:///badurl.com d http://bit.ly and http://example.org" => array('http://bit.ly', 'http://example.org'), "blah blah blah http:///badurl.com d HTTP://yo.com/exi.xml?hi=yes and http://example.org/blah/yoiadsf/934324/" => array('HTTP://yo.com/exi.xml?hi=yes', 'http://example.org/blah/yoiadsf/934324/'), "I bought the book at http://amazon.com. You should read it, too" => array('http://amazon.com'), "So, Who's on first? check “http://culturalwormhole.blogspot.com/” for more." => array('http://culturalwormhole.blogspot.com/'), "We know all about that (http://friendoflou.com), but we're not impressed." => array('http://friendoflou.com'), "A more terse, yet still friendly introduction notme.com norme.com/ bit.ly/gXpdUZ and blah yo.com/exi" . ".xml?hi=yes blah" => array('http://bit.ly/gXpdUZ', 'http://yo.com/exi.xml?hi=yes'), "tersely www.google.com notme.google.com www.nytimes.com" => array('http://www.google.com', 'http://www.nytimes.com'), "would you believe this url?  http://foo.com/more_(than)_one_(parens)   " => array('http://foo.com/more_(than)_one_(parens)'), "detects embedding <http://foo.com/blah_blah/> nicely <tag>http://example.com</tag>" => array('http://foo.com/blah_blah/', 'http://example.com'), '"RT @someone doesnt screw up RTs with quotes that bookend a link like http://example.com"' => array('http://example.com'), "This here's a t.co link enclosed by a curly brace http://t.co/2JVSpi5�" => array('http://t.co/2JVSpi5'));
     foreach ($test_patterns as $test_text => $expected_urls) {
         $urls = Post::extractURLs($test_text);
         $this->assertIdentical($expected_urls, $urls, $test_text . ' %s');
         $this->assertTrue(array_reduce(array_map('Utils::validateURL', $urls), 'TestOfPost::isAllTrue', true));
     }
 }
 /**
  * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url.
  * @TODO Move image thumbnail processng to Expand URLs plugin.
  * @param Logger $logger
  * @param str $tweet
  * @param Array $urls
  */
 public static function processTweetURLs($logger, $tweet, $urls = null)
 {
     $link_dao = DAOFactory::getDAO('LinkDAO');
     if (!$urls) {
         $urls = Post::extractURLs($tweet['post_text']);
     }
     foreach ($urls as $u) {
         $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__);
         $is_image = 0;
         $title = '';
         $eurl = '';
         if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') {
             $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') {
             $eurl = $u . '.th.jpg';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') {
             $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') {
             $eurl = $u . '/thumb/';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') {
             // see: http://instagr.am/developer/embedding/ for reference
             // the following does a redirect to the actual jpg
             // make a check for an end slash in the url -- if it is there (likely) then adding a second
             // slash prior to the 'media' string will break the expanded url
             if ($u[strlen($u) - 1] == '/') {
                 $eurl = $u . 'media/';
             } else {
                 $eurl = $u . '/media/';
             }
             $logger->logDebug("expanded instagram URL to: " . $eurl, __METHOD__ . ',' . __LINE__);
             $is_image = 1;
         }
         if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) {
             $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__);
         } else {
             $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__);
         }
     }
 }
Example #6
0
 public function testExtractURLs()
 {
     $testme = "Introducing the ThinkUp developers mailing list http://bit.ly/gXpdUZ";
     $urls = Post::extractURLs($testme);
     $expected = array('http://bit.ly/gXpdUZ');
     $this->assertIdentical($expected, $urls);
     $testme = "http://j.mp/g2F037 good advice (Mad Men-illustrated) for women in tech";
     $urls = Post::extractURLs($testme);
     $expected = array('http://j.mp/g2F037');
     $this->assertIdentical($expected, $urls);
     $testme = "blah blah blah http:///badurl.com d http://bit.ly and http://example.org";
     $urls = Post::extractURLs($testme);
     $expected = array('http://bit.ly', 'http://example.org');
     $this->assertIdentical($expected, $urls);
     $testme = "blah blah blah http:///badurl.com d http://yo.com/exi.xml?hi=yes and " . "http://example.org/blah/yoiadsf/934324/";
     $urls = Post::extractURLs($testme);
     $expected = array('http://yo.com/exi.xml?hi=yes', 'http://example.org/blah/yoiadsf/934324/');
     $this->assertIdentical($expected, $urls);
 }
Example #7
0
 /**
  * For a given post, extract URLs and store them, including image_src if that's from a known source like Twitpic,
  * Twitgoo, Yfrog, Instagr.am.
  * @param str $post_text
  * @param int $post_id
  * @param str $network
  * @param Logger $logger
  * @param arr $urls Array of URLs, optionally set, defaults to null
  */
 public static function processPostURLs($post_text, $post_id, $network, $logger, $urls = null)
 {
     if (!$urls) {
         $urls = Post::extractURLs($post_text);
     }
     if ($urls) {
         $link_dao = DAOFactory::getDAO('LinkDAO');
         foreach ($urls as $url) {
             $logger->logInfo("Processing URL: {$url}", __METHOD__ . ',' . __LINE__);
             $image_src = self::getImageSource($url);
             //if we have an image_src, the URL is a known image source not in need of expansion
             $expanded_url = isset($image_src) ? $url : '';
             $link_array = array('url' => $url, 'expanded_url' => $expanded_url, "image_src" => $image_src, 'post_id' => $post_id, 'network' => $network);
             $link = new Link($link_array);
             if ($link_dao->insert($link)) {
                 $logger->logSuccess("Inserted " . $url . " (thumbnail " . $image_src . "), into links table", __METHOD__ . ',' . __LINE__);
             } else {
                 $logger->logError("Did NOT insert " . $url . " (thumbnail " . $image_src . ") into links table", __METHOD__ . ',' . __LINE__);
             }
         }
     }
 }
Example #8
0
 private function processTweetURLs($tweet)
 {
     $ld = DAOFactory::getDAO('LinkDAO');
     $urls = Post::extractURLs($tweet['post_text']);
     foreach ($urls as $u) {
         //if it's an image (Twitpic/Twitgoo/Yfrog/Flickr for now)
         //insert direct path to thumb as expanded url, otherwise, just expand
         //set defaults
         $is_image = 0;
         $title = '';
         $eurl = '';
         //TODO Abstract out this image thumbnail link expansion into an Image Thumbnail plugin
         //modeled after the Flickr Thumbnails plugin
         if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') {
             $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') {
             $eurl = $u . '.th.jpg';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') {
             $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') {
             $is_image = 1;
         }
         if ($ld->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) {
             $this->logger->logStatus("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", get_class($this));
         } else {
             $this->logger->logStatus("Did NOT insert " . $u . " (" . $eurl . ") into links table", get_class($this));
         }
     }
 }
 /**
  * Capture the current instance users's tweets and store them in the database.
  */
 public function fetchInstanceUserTweets()
 {
     if (!isset($this->user)) {
         $this->fetchInstanceUserInfo();
     }
     if (isset($this->user)) {
         // check for deletes
         if ($this->instance->total_posts_in_system >= $this->user->post_count) {
             $this->processDeletedTweets();
             return;
         }
         $status_message = "";
         $continue_fetching = true;
         $this->logger->logInfo("Twitter user post count:  " . $this->user->post_count . " and ThinkUp post count: " . $this->instance->total_posts_in_system, __METHOD__ . ',' . __LINE__);
         // Set up endpoint and unchanging args
         $endpoint = $this->api->endpoints['user_timeline'];
         $args = array();
         $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100;
         $args["count"] = $count_arg;
         $args["include_rts"] = "true";
         $args["screen_name"] = $this->user->username;
         $max_id = "";
         //have we fetching latest tweets with no max_id once?
         $got_latest_tweets = false;
         //are we fetching the archive using the max_id?
         $fetching_archive = false;
         while ($this->user->post_count > $this->instance->total_posts_in_system && $continue_fetching) {
             if ($got_latest_tweets) {
                 $max_id = $this->instance->last_post_id;
                 if ($max_id !== "") {
                     $args["max_id"] = $max_id;
                     $fetching_archive = true;
                 }
             }
             try {
                 list($http_status, $payload) = $this->api->apiRequest($endpoint, $args);
                 if ($http_status == 200) {
                     $count = 0;
                     $tweets = $this->api->parseJSONTweets($payload);
                     $post_dao = DAOFactory::getDAO('PostDAO');
                     $new_username = false;
                     $link_dao = DAOFactory::getDAO('LinkDAO');
                     foreach ($tweets as $tweet) {
                         $tweet['network'] = 'twitter';
                         $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger);
                         if ($inserted_post_key !== false) {
                             $count = $count + 1;
                             $this->instance->total_posts_in_system = $this->instance->total_posts_in_system + 1;
                             // Expand and insert links contained in tweet
                             $extracted_urls = Post::extractURLs($tweet['post_text']);
                             $urls = array();
                             // Skip over URLs where we are extracting image media
                             foreach ($extracted_urls as $url) {
                                 $add_url = true;
                                 if (!empty($tweet['photos'])) {
                                     foreach ($tweet['photos'] as $media) {
                                         if ($media->display_url == $url || $media->url == $url) {
                                             $add_url = false;
                                             continue;
                                         }
                                     }
                                 }
                                 if ($add_url) {
                                     $urls[] = $url;
                                 }
                             }
                             if (count($urls)) {
                                 URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger, $urls);
                             }
                             if (!empty($tweet['photos'])) {
                                 foreach ($tweet['photos'] as $photo) {
                                     $link = new Link(array('url' => $photo->url, 'expanded_url' => $photo->expanded_url, 'image_src' => $photo->media_url, 'post_key' => $inserted_post_key));
                                     try {
                                         $link_dao->insert($link);
                                         $this->logger->logSuccess("Inserted {$photo->url} into links table", __METHOD__ . ',' . __LINE__);
                                     } catch (DuplicateLinkException $e) {
                                         $this->logger->logInfo($photo->url . " already exists in links table", __METHOD__ . ',' . __LINE__);
                                     } catch (DataExceedsColumnWidthException $e) {
                                         $this->logger->logInfo($photo->url . "data exceeds table column width", __METHOD__ . ',' . __LINE__);
                                     }
                                 }
                             }
                         }
                         if ($this->instance->last_post_id == "" || $fetching_archive) {
                             $this->instance->last_post_id = $tweet['post_id'];
                         }
                     }
                     $got_latest_tweets = true;
                     if (count($tweets) > 0 || $count > 0) {
                         $status_message .= ' ' . count($tweets) . " tweet(s) found and {$count} saved";
                         $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
                         $status_message = "";
                     } else {
                         $continue_fetching = false;
                     }
                     //if you've got more than the Twitter API archive limit, stop looking for more tweets
                     if ($this->instance->total_posts_in_system >= $this->api->archive_limit) {
                         $continue_fetching = false;
                         $overage_info = "Twitter only makes " . number_format($this->api->archive_limit) . " tweets available, so some of the oldest ones may be missing.";
                     } else {
                         $overage_info = "";
                     }
                     if ($this->user->post_count == $this->instance->total_posts_in_system) {
                         $this->instance->is_archive_loaded_tweets = true;
                         $continue_fetching = false;
                     }
                     if ($max_id !== "" && $this->instance->last_post_id !== "" && $max_id == $this->instance->last_post_id) {
                         $continue_fetching = false;
                     }
                 } else {
                     $continue_fetching = false;
                 }
             } catch (APICallLimitExceededException $e) {
                 $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__);
                 break;
             }
         }
         $status_message .= number_format($this->instance->total_posts_in_system) . " tweets are in ThinkUp; " . $this->user->username . " has " . number_format($this->user->post_count) . " tweets according to Twitter.";
         $this->logger->logUserInfo($status_message, __METHOD__ . ',' . __LINE__);
         if (isset($overage_info) && $overage_info != '') {
             $this->logger->logUserError($overage_info, __METHOD__ . ',' . __LINE__);
         }
         if ($this->instance->total_posts_in_system >= $this->user->post_count) {
             $status_message = "All of " . $this->user->username . "'s tweets are in ThinkUp.";
             $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__);
         }
         if (isset($this->user->username) && $this->user->username != $this->instance->network_username) {
             // User has changed their username, so update instance and posts data
             $instance_dao = DAOFactory::getDAO('InstanceDAO');
             $instance_dao->updateUsername($this->instance->id, $this->user->username);
             $post_dao = DAOFactory::getDAO('PostDAO');
             $post_dao->updateAuthorUsername($this->instance->network_user_id, 'twitter', $this->user->username);
         }
     }
 }
 private function processTweetURLs($tweet, $lurl, $fa)
 {
     $ld = new LinkDAO($this->db, $this->logger);
     $urls = Post::extractURLs($tweet['post_text']);
     foreach ($urls as $u) {
         //if it's an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url, otherwise, just expand
         //set defaults
         $is_image = 0;
         $title = '';
         $eurl = '';
         if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') {
             $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/'));
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') {
             $eurl = $u . '.th.jpg';
             $is_image = 1;
         } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') {
             $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/'));
             $is_image = 1;
         } elseif ($fa->api_key != null && substr($u, 0, strlen('http://flic.kr/p/')) == 'http://flic.kr/p/') {
             $eurl = $fa->getFlickrPhotoSource($u);
             if ($eurl != '') {
                 $is_image = 1;
             }
         } else {
             $eurl_arr = $lurl->expandUrl($u);
             if (isset($eurl_arr['response-code']) && $eurl_arr['response-code'] == 200) {
                 $eurl = $eurl_arr['long-url'];
                 if (isset($eurl_arr['title'])) {
                     $title = $eurl_arr['title'];
                 }
             }
         }
         if ($ld->insert($u, $eurl, $title, $tweet['post_id'], $is_image)) {
             $this->logger->logStatus("Inserted " . $u . " (" . $eurl . ") into links table", get_class($this));
         } else {
             $this->logger->logStatus("Did NOT insert " . $u . " (" . $eurl . ") into links table", get_class($this));
         }
     }
 }