/** * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url. * @TODO Move image thumbnail processng to Expand URLs plugin. * @param Logger $logger * @param str $tweet * @param Array $urls */ public static function processTweetURLs($logger, $tweet, $urls = null) { $link_dao = DAOFactory::getDAO('LinkDAO'); if (!$urls) { $urls = Post::extractURLs($tweet['post_text']); } foreach ($urls as $u) { $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__); $is_image = 0; $title = ''; $eurl = ''; if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') { $eurl = $u . '/thumb/'; $is_image = 1; } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { $is_image = 1; } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') { $is_image = 1; } if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) { $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__); } } }
/** * For a given post, extract URLs and store them, including image_src if that's from a known source like Twitpic, * Twitgoo, Yfrog, Instagr.am. * @param str $post_text * @param int $post_id * @param str $network * @param Logger $logger * @param arr $urls Array of URLs, optionally set, defaults to null */ public static function processPostURLs($post_text, $post_id, $network, $logger, $urls = null) { if (!$urls) { $urls = Post::extractURLs($post_text); } if ($urls) { $link_dao = DAOFactory::getDAO('LinkDAO'); $post_dao = DAOFactory::getDAO('PostDAO'); $post = $post_dao->getPost($post_id, $network); if (isset($post->id)) { foreach ($urls as $url) { $logger->logInfo("Processing URL {$url}", __METHOD__ . ',' . __LINE__); $image_src = self::getImageSource($url); //if we have an image_src, the URL is a known image source not in need of expansion $expanded_url = $image_src !== '' ? $url : ''; $link_array = array('url' => $url, 'expanded_url' => $expanded_url, "image_src" => $image_src, 'post_key' => $post->id); $link = new Link($link_array); try { $link_dao->insert($link); $logger->logSuccess("Inserted " . $url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . "into links table", __METHOD__ . ',' . __LINE__); } catch (DuplicateLinkException $e) { $logger->logInfo($url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . " already exists in links table", __METHOD__ . ',' . __LINE__); } catch (DataExceedsColumnWidthException $e) { $logger->logInfo($url . " " . ($image_src == '' ? '' : "(thumbnail " . $image_src . ") ") . " data exceeds table column width", __METHOD__ . ',' . __LINE__); } } } } }
public function testExtractURLs() { $testme = "blah blah blah http:///badurl.com d http://bit.ly and http://example.org"; $urls = Post::extractURLs($testme); $expected = array('http:///badurl.com', 'http://bit.ly', 'http://example.org'); $this->assertIdentical($expected, $urls); //@TODO Finesse the regex to NOT match URLs with triple slashes, http:///badurl.com }
public function testExtractURLs() { $test_patterns = array("Introducing the ThinkUp developers mailing list http://bit.ly/gXpdUZ" => array('http://bit.ly/gXpdUZ'), "http://j.mp/g2F037 good advice (Mad Men-illustrated) for women in tech" => array('http://j.mp/g2F037'), "blah blah blah http:///badurl.com d http://bit.ly and http://example.org" => array('http://bit.ly', 'http://example.org'), "blah blah blah http:///badurl.com d HTTP://yo.com/exi.xml?hi=yes and http://example.org/blah/yoiadsf/934324/" => array('HTTP://yo.com/exi.xml?hi=yes', 'http://example.org/blah/yoiadsf/934324/'), "I bought the book at http://amazon.com. You should read it, too" => array('http://amazon.com'), "So, Who's on first? check “http://culturalwormhole.blogspot.com/” for more." => array('http://culturalwormhole.blogspot.com/'), "We know all about that (http://friendoflou.com), but we're not impressed." => array('http://friendoflou.com'), "A more terse, yet still friendly introduction notme.com norme.com/ bit.ly/gXpdUZ and blah yo.com/exi" . ".xml?hi=yes blah" => array('http://bit.ly/gXpdUZ', 'http://yo.com/exi.xml?hi=yes'), "tersely www.google.com notme.google.com www.nytimes.com" => array('http://www.google.com', 'http://www.nytimes.com'), "would you believe this url? http://foo.com/more_(than)_one_(parens) " => array('http://foo.com/more_(than)_one_(parens)'), "detects embedding <http://foo.com/blah_blah/> nicely <tag>http://example.com</tag>" => array('http://foo.com/blah_blah/', 'http://example.com'), '"RT @someone doesnt screw up RTs with quotes that bookend a link like http://example.com"' => array('http://example.com'), "This here's a t.co link enclosed by a curly brace http://t.co/2JVSpi5�" => array('http://t.co/2JVSpi5')); foreach ($test_patterns as $test_text => $expected_urls) { $urls = Post::extractURLs($test_text); $this->assertIdentical($expected_urls, $urls, $test_text . ' %s'); $this->assertTrue(array_reduce(array_map('Utils::validateURL', $urls), 'TestOfPost::isAllTrue', true)); } }
/** * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url. * @TODO Move image thumbnail processng to Expand URLs plugin. * @param Logger $logger * @param str $tweet * @param Array $urls */ public static function processTweetURLs($logger, $tweet, $urls = null) { $link_dao = DAOFactory::getDAO('LinkDAO'); if (!$urls) { $urls = Post::extractURLs($tweet['post_text']); } foreach ($urls as $u) { $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__); $is_image = 0; $title = ''; $eurl = ''; if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') { $eurl = $u . '/thumb/'; $is_image = 1; } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { $is_image = 1; } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') { // see: http://instagr.am/developer/embedding/ for reference // the following does a redirect to the actual jpg // make a check for an end slash in the url -- if it is there (likely) then adding a second // slash prior to the 'media' string will break the expanded url if ($u[strlen($u) - 1] == '/') { $eurl = $u . 'media/'; } else { $eurl = $u . '/media/'; } $logger->logDebug("expanded instagram URL to: " . $eurl, __METHOD__ . ',' . __LINE__); $is_image = 1; } if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) { $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__); } } }
public function testExtractURLs() { $testme = "Introducing the ThinkUp developers mailing list http://bit.ly/gXpdUZ"; $urls = Post::extractURLs($testme); $expected = array('http://bit.ly/gXpdUZ'); $this->assertIdentical($expected, $urls); $testme = "http://j.mp/g2F037 good advice (Mad Men-illustrated) for women in tech"; $urls = Post::extractURLs($testme); $expected = array('http://j.mp/g2F037'); $this->assertIdentical($expected, $urls); $testme = "blah blah blah http:///badurl.com d http://bit.ly and http://example.org"; $urls = Post::extractURLs($testme); $expected = array('http://bit.ly', 'http://example.org'); $this->assertIdentical($expected, $urls); $testme = "blah blah blah http:///badurl.com d http://yo.com/exi.xml?hi=yes and " . "http://example.org/blah/yoiadsf/934324/"; $urls = Post::extractURLs($testme); $expected = array('http://yo.com/exi.xml?hi=yes', 'http://example.org/blah/yoiadsf/934324/'); $this->assertIdentical($expected, $urls); }
/** * For a given post, extract URLs and store them, including image_src if that's from a known source like Twitpic, * Twitgoo, Yfrog, Instagr.am. * @param str $post_text * @param int $post_id * @param str $network * @param Logger $logger * @param arr $urls Array of URLs, optionally set, defaults to null */ public static function processPostURLs($post_text, $post_id, $network, $logger, $urls = null) { if (!$urls) { $urls = Post::extractURLs($post_text); } if ($urls) { $link_dao = DAOFactory::getDAO('LinkDAO'); foreach ($urls as $url) { $logger->logInfo("Processing URL: {$url}", __METHOD__ . ',' . __LINE__); $image_src = self::getImageSource($url); //if we have an image_src, the URL is a known image source not in need of expansion $expanded_url = isset($image_src) ? $url : ''; $link_array = array('url' => $url, 'expanded_url' => $expanded_url, "image_src" => $image_src, 'post_id' => $post_id, 'network' => $network); $link = new Link($link_array); if ($link_dao->insert($link)) { $logger->logSuccess("Inserted " . $url . " (thumbnail " . $image_src . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $url . " (thumbnail " . $image_src . ") into links table", __METHOD__ . ',' . __LINE__); } } } }
private function processTweetURLs($tweet) { $ld = DAOFactory::getDAO('LinkDAO'); $urls = Post::extractURLs($tweet['post_text']); foreach ($urls as $u) { //if it's an image (Twitpic/Twitgoo/Yfrog/Flickr for now) //insert direct path to thumb as expanded url, otherwise, just expand //set defaults $is_image = 0; $title = ''; $eurl = ''; //TODO Abstract out this image thumbnail link expansion into an Image Thumbnail plugin //modeled after the Flickr Thumbnails plugin if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { $is_image = 1; } if ($ld->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) { $this->logger->logStatus("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", get_class($this)); } else { $this->logger->logStatus("Did NOT insert " . $u . " (" . $eurl . ") into links table", get_class($this)); } } }
/** * Capture the current instance users's tweets and store them in the database. */ public function fetchInstanceUserTweets() { if (!isset($this->user)) { $this->fetchInstanceUserInfo(); } if (isset($this->user)) { // check for deletes if ($this->instance->total_posts_in_system >= $this->user->post_count) { $this->processDeletedTweets(); return; } $status_message = ""; $continue_fetching = true; $this->logger->logInfo("Twitter user post count: " . $this->user->post_count . " and ThinkUp post count: " . $this->instance->total_posts_in_system, __METHOD__ . ',' . __LINE__); // Set up endpoint and unchanging args $endpoint = $this->api->endpoints['user_timeline']; $args = array(); $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args["count"] = $count_arg; $args["include_rts"] = "true"; $args["screen_name"] = $this->user->username; $max_id = ""; //have we fetching latest tweets with no max_id once? $got_latest_tweets = false; //are we fetching the archive using the max_id? $fetching_archive = false; while ($this->user->post_count > $this->instance->total_posts_in_system && $continue_fetching) { if ($got_latest_tweets) { $max_id = $this->instance->last_post_id; if ($max_id !== "") { $args["max_id"] = $max_id; $fetching_archive = true; } } try { list($http_status, $payload) = $this->api->apiRequest($endpoint, $args); if ($http_status == 200) { $count = 0; $tweets = $this->api->parseJSONTweets($payload); $post_dao = DAOFactory::getDAO('PostDAO'); $new_username = false; $link_dao = DAOFactory::getDAO('LinkDAO'); foreach ($tweets as $tweet) { $tweet['network'] = 'twitter'; $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger); if ($inserted_post_key !== false) { $count = $count + 1; $this->instance->total_posts_in_system = $this->instance->total_posts_in_system + 1; // Expand and insert links contained in tweet $extracted_urls = Post::extractURLs($tweet['post_text']); $urls = array(); // Skip over URLs where we are extracting image media foreach ($extracted_urls as $url) { $add_url = true; if (!empty($tweet['photos'])) { foreach ($tweet['photos'] as $media) { if ($media->display_url == $url || $media->url == $url) { $add_url = false; continue; } } } if ($add_url) { $urls[] = $url; } } if (count($urls)) { URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger, $urls); } if (!empty($tweet['photos'])) { foreach ($tweet['photos'] as $photo) { $link = new Link(array('url' => $photo->url, 'expanded_url' => $photo->expanded_url, 'image_src' => $photo->media_url, 'post_key' => $inserted_post_key)); try { $link_dao->insert($link); $this->logger->logSuccess("Inserted {$photo->url} into links table", __METHOD__ . ',' . __LINE__); } catch (DuplicateLinkException $e) { $this->logger->logInfo($photo->url . " already exists in links table", __METHOD__ . ',' . __LINE__); } catch (DataExceedsColumnWidthException $e) { $this->logger->logInfo($photo->url . "data exceeds table column width", __METHOD__ . ',' . __LINE__); } } } } if ($this->instance->last_post_id == "" || $fetching_archive) { $this->instance->last_post_id = $tweet['post_id']; } } $got_latest_tweets = true; if (count($tweets) > 0 || $count > 0) { $status_message .= ' ' . count($tweets) . " tweet(s) found and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); $status_message = ""; } else { $continue_fetching = false; } //if you've got more than the Twitter API archive limit, stop looking for more tweets if ($this->instance->total_posts_in_system >= $this->api->archive_limit) { $continue_fetching = false; $overage_info = "Twitter only makes " . number_format($this->api->archive_limit) . " tweets available, so some of the oldest ones may be missing."; } else { $overage_info = ""; } if ($this->user->post_count == $this->instance->total_posts_in_system) { $this->instance->is_archive_loaded_tweets = true; $continue_fetching = false; } if ($max_id !== "" && $this->instance->last_post_id !== "" && $max_id == $this->instance->last_post_id) { $continue_fetching = false; } } else { $continue_fetching = false; } } catch (APICallLimitExceededException $e) { $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__); break; } } $status_message .= number_format($this->instance->total_posts_in_system) . " tweets are in ThinkUp; " . $this->user->username . " has " . number_format($this->user->post_count) . " tweets according to Twitter."; $this->logger->logUserInfo($status_message, __METHOD__ . ',' . __LINE__); if (isset($overage_info) && $overage_info != '') { $this->logger->logUserError($overage_info, __METHOD__ . ',' . __LINE__); } if ($this->instance->total_posts_in_system >= $this->user->post_count) { $status_message = "All of " . $this->user->username . "'s tweets are in ThinkUp."; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); } if (isset($this->user->username) && $this->user->username != $this->instance->network_username) { // User has changed their username, so update instance and posts data $instance_dao = DAOFactory::getDAO('InstanceDAO'); $instance_dao->updateUsername($this->instance->id, $this->user->username); $post_dao = DAOFactory::getDAO('PostDAO'); $post_dao->updateAuthorUsername($this->instance->network_user_id, 'twitter', $this->user->username); } } }
private function processTweetURLs($tweet, $lurl, $fa) { $ld = new LinkDAO($this->db, $this->logger); $urls = Post::extractURLs($tweet['post_text']); foreach ($urls as $u) { //if it's an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url, otherwise, just expand //set defaults $is_image = 0; $title = ''; $eurl = ''; if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif ($fa->api_key != null && substr($u, 0, strlen('http://flic.kr/p/')) == 'http://flic.kr/p/') { $eurl = $fa->getFlickrPhotoSource($u); if ($eurl != '') { $is_image = 1; } } else { $eurl_arr = $lurl->expandUrl($u); if (isset($eurl_arr['response-code']) && $eurl_arr['response-code'] == 200) { $eurl = $eurl_arr['long-url']; if (isset($eurl_arr['title'])) { $title = $eurl_arr['title']; } } } if ($ld->insert($u, $eurl, $title, $tweet['post_id'], $is_image)) { $this->logger->logStatus("Inserted " . $u . " (" . $eurl . ") into links table", get_class($this)); } else { $this->logger->logStatus("Did NOT insert " . $u . " (" . $eurl . ") into links table", get_class($this)); } } }