/** * Fetch instance users's friends tweets and friends. */ public function fetchFriendTweetsAndFriends() { if (!isset($this->user)) { $this->fetchInstanceUserInfo(); } if (isset($this->user)) { $fd = DAOFactory::getDAO('FollowDAO'); $pd = DAOFactory::getDAO('PostDAO'); $continue_fetching = true; while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue_fetching) { $stale_friend = $fd->getStalestFriend($this->user->user_id, 'twitter'); if ($stale_friend != null) { $this->logger->logInfo($stale_friend->username . " is friend most need of update", __METHOD__ . ',' . __LINE__); $stale_friend_tweets = str_replace("[id]", $stale_friend->username, $this->api->cURL_source['user_timeline']); $args = array(); $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args['count'] = $count_arg; if ($stale_friend->last_post_id > 0) { $args['since_id'] = $stale_friend->last_post_id; } list($cURL_status, $twitter_data) = $this->api->apiRequest($stale_friend_tweets, $args); if ($cURL_status == 200) { $count = 0; $tweets = $this->api->parseXML($twitter_data); if (count($tweets) > 0) { $stale_friend_updated_from_tweets = false; foreach ($tweets as $tweet) { if ($pd->addPost($tweet, $stale_friend, $this->logger) > 0) { $count++; //expand and insert links contained in tweet URLProcessor::processTweetURLs($this->logger, $tweet); } if (!$stale_friend_updated_from_tweets) { //Update stale_friend values here $stale_friend->full_name = $tweet['full_name']; $stale_friend->avatar = $tweet['avatar']; $stale_friend->location = $tweet['location']; $stale_friend->description = $tweet['description']; $stale_friend->url = $tweet['url']; $stale_friend->is_protected = $tweet['is_protected']; $stale_friend->follower_count = $tweet['follower_count']; $stale_friend->friend_count = $tweet['friend_count']; $stale_friend->post_count = $tweet['post_count']; $stale_friend->joined = date_format(date_create($tweet['joined']), "Y-m-d H:i:s"); if ($tweet['post_id'] > $stale_friend->last_post_id) { $stale_friend->last_post_id = $tweet['post_id']; } $this->user_dao->updateUser($stale_friend); $stale_friend_updated_from_tweets = true; } } } else { $this->fetchAndAddUser($stale_friend->user_id, "Friends"); } $this->logger->logInfo(count($tweets) . " tweet(s) found for " . $stale_friend->username . " and " . $count . " saved", __METHOD__ . ',' . __LINE__); $this->fetchUserFriendsByIDs($stale_friend->user_id, $fd); } elseif ($cURL_status == 401 || $cURL_status == 404) { $e = $this->api->parseError($twitter_data); $ued = DAOFactory::getDAO('UserErrorDAO'); $ued->insertError($stale_friend->user_id, $cURL_status, isset($e['error']) ? $e['error'] : $twitter_data, $this->user->user_id, 'twitter'); $this->logger->logInfo('User error saved', __METHOD__ . ',' . __LINE__); } } else { $this->logger->logInfo('No friend staler than 1 day', __METHOD__ . ',' . __LINE__); $continue_fetching = false; } } } }
public function testProcessTweetURLs() { //Twitpic $tweet["post_id"] = 100; $tweet['post_text'] = "This is a Twitpic post http://twitpic.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitpic.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitpic.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitpic.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 100); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Yfrog $tweet["post_id"] = 101; $tweet['post_text'] = "This is a Yfrog post http://yfrog.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://yfrog.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://yfrog.com/blah'); $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah.th.jpg'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 101); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Twitgoo $tweet["post_id"] = 102; $tweet['post_text'] = "This is a Twitgoo post http://twitgoo.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitgoo.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitgoo.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 102); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Picplz $tweet["post_id"] = 103; $tweet['post_text'] = "This is a Picplz post http://picplz.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://picplz.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://picplz.com/blah'); $this->assertEqual($result->expanded_url, 'http://picplz.com/blah/thumb/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 103); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); // instagr.am // check first with ending slash in URL (which the URLs 'should' include) $tweet["post_id"] = 104; $tweet['post_text'] = "This is an instagram post http:/instagr.am/blah/ Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://instagr.am/blah/'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah/'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 104); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); // check w/out ending slash also just in case $tweet["post_id"] = 105; $tweet['post_text'] = "This is an instagram post http:/instagr.am/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $result = $link_dao->getLinkByUrl('http://instagr.am/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 105); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Flic.kr $tweet["post_id"] = 106; $tweet['post_text'] = "This is a Flickr post http://flic.kr/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://flic.kr/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://flic.kr/blah'); $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 106); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); }
public function addPostAndAssociatedInfo(array $vals, $entities = null, $user_array = null) { $urls = null; // first add post $new_post_key = $this->addPost($vals); // if post did not already exist if ($new_post_key) { if ($user_array) { $u = new User($user_array); $user_dao = DAOFactory::getDAO('UserDAO'); $user_dao->setLoggerInstance($this->logger); $user_dao->updateUser($u); } if ($entities && isset($entities['urls'])) { $urls = $entities['urls']; } $urls = URLProcessor::processPostURLs($vals['post_text'], $vals['post_id'], 'twitter', $this->logger, $urls); if ($vals['photos']) { $link_dao = DAOFactory::getDAO('LinkDAO'); foreach ($vals['photos'] as $photo) { $photo = (object) $photo; $link_dao->saveExpandedURL($photo->url, $photo->expanded_url, null, $photo->media_url, null); } } if (isset($entities)) { if (isset($entities['mentions'])) { $mention_dao = DAOFactory::getDAO('MentionDAO'); $mention_dao->setLoggerInstance($this->logger); $mention_dao->insertMentions($entities['mentions'], $vals['post_id'], $vals['author_user_id'], $vals['network']); } if (isset($entities['hashtags'])) { $hashtagpost_dao = DAOFactory::getDAO('HashtagPostDAO'); $hashtagpost_dao->setLoggerInstance($this->logger); $hashtagpost_dao->insertHashtagPosts($entities['hashtags'], $vals['post_id'], $vals['network']); } if (isset($entities['place'])) { $place = $entities['place']; if ($place) { $place_dao = DAOFactory::getDAO('PlaceDAO'); $place_dao->setLoggerInstance($this->logger); $place_dao->insertPlace($place, $vals['post_id'], $vals['network']); } } } } return $new_post_key; }
public function testProcessPostURLs() { $builders = array(); $network = 'twitter'; //Twitpic $post_id = 100; $post_text = "This is a Twitpic post http://twitpic.com/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitpic.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitpic.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitpic.com/blah'); $this->assertEqual($result->image_src, 'http://twitpic.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 100); //Yfrog $post_id = 101; $post_text = "This is a Yfrog post http://yfrog.com/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://yfrog.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://yfrog.com/blah'); $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah'); $this->assertEqual($result->image_src, 'http://yfrog.com/blah.th.jpg'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 101); //Twitgoo $post_id = 102; $post_text = "This is a Twitgoo post http://twitgoo.com/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitgoo.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitgoo.com/blah'); $this->assertEqual($result->image_src, 'http://twitgoo.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 102); //Picplz $post_id = 103; $post_text = "This is a Picplz post http://picplz.com/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://picplz.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://picplz.com/blah'); $this->assertEqual($result->expanded_url, 'http://picplz.com/blah'); $this->assertEqual($result->image_src, 'http://picplz.com/blah/thumb/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 103); // instagr.am // check first with ending slash in URL (which the URLs 'should' include) $post_id = 104; $post_text = "This is an instagram post http:/instagr.am/blah/ Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://instagr.am/blah/'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah/'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/'); $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 104); // check w/out ending slash also just in case $post_id = 105; $post_text = "This is an instagram post http:/instagr.am/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $result = $link_dao->getLinkByUrl('http://instagr.am/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah'); $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 105); //Flic.kr $post_id = 106; $post_text = "This is a Flickr post http://flic.kr/blah Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://flic.kr/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://flic.kr/blah'); //still need to expand the flic.kr link $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->image_src, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 106); $post_id = 107; $post_text = "This is a post with a curly quote closing the link http://t.co/2JVSpi5 yo"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://t.co/2JVSpi5'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://t.co/2JVSpi5'); $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->image_src, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 107); //Lockerz $post_id = 108; $post_text = "This is a lockerz post http://lockerz.com/s/138376416 Yay!"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://lockerz.com/s/138376416'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://lockerz.com/s/138376416'); $this->assertEqual($result->expanded_url, 'http://lockerz.com/s/138376416'); $this->assertEqual($result->image_src, 'http://api.plixi.com/api/tpapi.svc/imagefromurl?url=http://plixi.com/p/138376416&size=thumbnail'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_key, 108); //test facebook $network = 'facebook'; }
/** * Abstraction for pulling data from a file or url * @throws exception * @param str $url * @return request response data */ private function fetchURLData($url) { if (strpos($url, "/") == 0 || strpos($url, ".") == 0) { // we are a file path, so use file_get_contents $contents = file_get_contents($url); } else { // else we are a url, so use our Util::getURLContents $contents = Utils::getURLContents(URLProcessor::getFinalURL($url)); } if (is_null($contents)) { $contents = false; } return $contents; }
public function addPostAndEntities($vals, $entities) { $urls = null; // first add post $retval = $this->addPost($vals); // if post did not already exist if ($retval) { // then process entity information as available. if (isset($entities) && isset($entities['urls'])) { $urls = $entities['urls']; } // if $urls is null, will extract from tweet content. URLProcessor::processTweetURLs($this->logger, $vals, $urls); } return $retval; }
/** * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links. */ public function expandOriginalURLs($flickr_api_key = null) { $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit); $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; $has_expanded_flickr_link = false; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link->url)) { $endless_loop_prevention_counter = 0; $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link->url; while (!$fully_expanded) { //begin Flickr thumbnail processing if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url); $has_expanded_flickr_link = true; $fully_expanded = true; } //end Flickr thumbnail processing $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger); if ($expanded_url == $short_link || $expanded_url == '' || $endless_loop_prevention_counter > self::EXPANSION_CAP) { $fully_expanded = true; } else { try { $this->short_link_dao->insert($link->id, $short_link); } catch (DataExceedsColumnWidthException $e) { $this->logger->logError($short_link . " short link record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__); $fully_expanded = true; } } if (strlen($expanded_url) < 256) { $short_link = $expanded_url; } else { $fully_expanded = true; } $endless_loop_prevention_counter++; } if (!$has_expanded_flickr_link) { if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $url_details = URLExpander::getWebPageDetails($expanded_url); try { $this->link_dao->saveExpandedUrl($link->url, $expanded_url, $url_details['title'], $image_src, $url_details['description']); $total_expanded = $total_expanded + 1; } catch (DataExceedsColumnWidthException $e) { $this->logger->logError($link->url . " record exceeds column width, cannot save", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "URL exceeds column width"); $total_errors = $total_errors + 1; } } else { $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere"); $total_errors = $total_errors + 1; } } } else { $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL"); $total_errors = $total_errors + 1; } $has_expanded_flickr_link = false; } $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
public function testProcessPostMultipleURLs() { $post_id = 100; $network = 'twitter'; $post_text = "http://awesome.com/ http://cool.com/"; $builders[] = FixtureBuilder::build('posts', array('id' => $post_id, 'post_id' => $post_id, 'network' => 'twitter', 'post_text' => $post_text)); $urls = URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $this->assertEqual(count($urls), 2); $this->assertEqual($urls[0], 'http://awesome.com/'); $this->assertEqual($urls[1], 'http://cool.com/'); $link_dao = DAOFactory::getDAO('LinkDAO'); $result = $link_dao->getLinksForPost($post_id, 'twitter'); $this->assertEqual(count($result), 2); $this->assertIsA($result[0], 'Link'); $this->assertEqual($result[0]->url, 'http://awesome.com/'); $this->assertEqual($result[1]->url, 'http://cool.com/'); }
/** * Fetch instance user's favorites since the last favorite stored. */ public function fetchInstanceUserFavorites() { if (!isset($this->user)) { $this->fetchInstanceUserInfo(); } $this->logger->logUserInfo("Checking for new favorites.", __METHOD__ . ',' . __LINE__); $last_fav_id = $this->instance->last_favorite_id; $this->logger->logInfo("Owner favs: " . $this->user->favorites_count . ", instance owner favs in system: " . $this->instance->owner_favs_in_system, __METHOD__ . ',' . __LINE__); $continue = true; while ($continue) { list($tweets, $http_status, $payload) = $this->getFavorites($last_fav_id); if ($http_status == 200) { if (sizeof($tweets) == 0) { // then done -- this should happen when we have run out of favs $this->logger->logInfo("It appears that we have run out of favorites to process", __METHOD__ . ',' . __LINE__); $continue = false; } else { $post_dao = DAOFactory::getDAO('FavoritePostDAO'); foreach ($tweets as $tweet) { $tweet['network'] = 'twitter'; if ($post_dao->addFavorite($this->user->user_id, $tweet) > 0) { URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); $this->logger->logInfo("found new fav: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__); $fcount++; $this->logger->logInfo("fcount: {$fcount}", __METHOD__ . ',' . __LINE__); $this->logger->logInfo("added favorite: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__); } else { // fav was already stored, so take no action. This could happen both because some // of the favs on the given page were processed last time, or because a separate process, // such as a UserStream process, is also watching for and storing favs. $status_message = "have already stored fav " . $tweet['post_id']; $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__); } // keep track of the highest fav id we've encountered if ($tweet['post_id'] > $last_fav_id) { $last_fav_id = $tweet['post_id']; } } // end foreach } } else { $continue = false; } } }
/** * Save expanded version of all unexpanded URLs to data store. */ public function expandRemainingURLs() { $logger = Logger::getInstance(); $link_dao = DAOFactory::getDAO('LinkDAO'); $links_to_expand = $link_dao->getLinksToExpand($this->link_limit); $logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link)) { $logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link; while (!$fully_expanded) { $expanded_url = self::untinyurl($short_link, $link_dao, $link, $index, count($links_to_expand)); if ($expanded_url == $short_link || $expanded_url == '') { $fully_expanded = true; } $short_link = $expanded_url; } if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $link_dao->saveExpandedUrl($link, $expanded_url, '', $image_src); $total_expanded = $total_expanded + 1; } else { $total_errors = $total_errors + 1; } } else { $total_errors = $total_errors + 1; $logger->logError($link . " not a valid URL", __METHOD__ . ',' . __LINE__); $link_dao->saveExpansionError($link, "Invalid URL"); } } $logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
public function addPostAndAssociatedInfo(array $vals, $entities = null, $user_array = null) { $urls = null; // first add post $retval = $this->addPost($vals); // if post did not already exist if ($retval) { if ($user_array) { $u = new User($user_array); $user_dao = DAOFactory::getDAO('UserDAO'); $user_dao->setLoggerInstance($this->logger); $user_dao->updateUser($u); } if ($entities && isset($entities['urls'])) { $urls = $entities['urls']; } URLProcessor::processPostURLs($vals['post_text'], $vals['post_id'], 'twitter', $this->logger, $urls); if (isset($entities)) { if (isset($entities['mentions'])) { $mdao = DAOFactory::getDAO('MentionDAO'); $mdao->setLoggerInstance($this->logger); $mdao->insertMentions($entities['mentions'], $vals['post_id'], $vals['author_user_id'], $vals['network']); } if (isset($entities['hashtags'])) { $hdao = DAOFactory::getDAO('HashtagDAO'); $hdao->setLoggerInstance($this->logger); $hdao->insertHashtags($entities['hashtags'], $vals['post_id'], $vals['network']); } if (isset($entities['place'])) { $place = $entities['place']; if ($place) { $place_dao = DAOFactory::getDAO('PlaceDAO'); $place_dao->setLoggerInstance($this->logger); $place_dao->insertPlace($place, $vals['post_id'], $vals['network']); } } } } return $retval; }
public function testProcessPostURLs() { $network = 'twitter'; //Twitpic $post_id = 100; $post_text = "This is a Twitpic post http://twitpic.com/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitpic.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitpic.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitpic.com/blah'); $this->assertEqual($result->image_src, 'http://twitpic.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 100); $this->assertEqual($result->network, 'twitter'); //Yfrog $post_id = 101; $post_text = "This is a Yfrog post http://yfrog.com/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://yfrog.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://yfrog.com/blah'); $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah'); $this->assertEqual($result->image_src, 'http://yfrog.com/blah.th.jpg'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 101); $this->assertEqual($result->network, 'twitter'); //Twitgoo $post_id = 102; $post_text = "This is a Twitgoo post http://twitgoo.com/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitgoo.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitgoo.com/blah'); $this->assertEqual($result->image_src, 'http://twitgoo.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 102); $this->assertEqual($result->network, 'twitter'); //test facebook $network = 'facebook'; //Picplz $post_id = 103; $post_text = "This is a Picplz post http://picplz.com/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://picplz.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://picplz.com/blah'); $this->assertEqual($result->expanded_url, 'http://picplz.com/blah'); $this->assertEqual($result->image_src, 'http://picplz.com/blah/thumb/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 103); $this->assertEqual($result->network, 'facebook'); // instagr.am // check first with ending slash in URL (which the URLs 'should' include) $post_id = 104; $post_text = "This is an instagram post http:/instagr.am/blah/ Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://instagr.am/blah/'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah/'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah/'); $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 104); $this->assertEqual($result->network, 'facebook'); // check w/out ending slash also just in case $post_id = 105; $post_text = "This is an instagram post http:/instagr.am/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $result = $link_dao->getLinkByUrl('http://instagr.am/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/blah'); $this->assertEqual($result->expanded_url, 'http://instagr.am/blah'); $this->assertEqual($result->image_src, 'http://instagr.am/blah/media/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 105); $this->assertEqual($result->network, 'facebook'); //Flic.kr $post_id = 106; $post_text = "This is a Flickr post http://flic.kr/blah Yay!"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://flic.kr/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://flic.kr/blah'); //still need to expand the flic.kr link $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->image_src, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 106); $this->assertEqual($result->network, 'facebook'); $post_id = 107; $post_text = "This is a post with a curly quote closing the link http://t.co/2JVSpi5Ó yo"; URLProcessor::processPostURLs($post_text, $post_id, $network, $this->logger); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://t.co/2JVSpi5'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://t.co/2JVSpi5'); $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->image_src, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 107); $this->assertEqual($result->network, 'facebook'); }
public function testProcessTweetURLs() { //Twitpic $tweet["post_id"] = 100; $tweet['post_text'] = "This is a Twitpic post http://twitpic.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitpic.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitpic.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitpic.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 100); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Yfrog $tweet["post_id"] = 101; $tweet['post_text'] = "This is a Yfrog post http://yfrog.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://yfrog.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://yfrog.com/blah'); $this->assertEqual($result->expanded_url, 'http://yfrog.com/blah.th.jpg'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 101); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Twitgoo $tweet["post_id"] = 102; $tweet['post_text'] = "This is a Twitgoo post http://twitgoo.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://twitgoo.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://twitgoo.com/blah'); $this->assertEqual($result->expanded_url, 'http://twitgoo.com/show/thumb/blah'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 102); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Picplz $tweet["post_id"] = 103; $tweet['post_text'] = "This is a Picplz post http://picplz.com/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://picplz.com/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://picplz.com/blah'); $this->assertEqual($result->expanded_url, 'http://picplz.com/blah/thumb/'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 103); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); //Flic.kr $tweet["post_id"] = 104; $tweet['post_text'] = "This is a Flickr post http://flic.kr/blah Yay!"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://flic.kr/blah'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://flic.kr/blah'); $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 104); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); }
/** * cleanUpMissedFavsUnFavs pages back through the older pages of favs, checking for favs that are not yet in * the database, as well as favs that were added to the db but are no longer returned by Twitter's API. * However, that latter calculation, for un-fav'd tweets, is currently not reliable due to a bug on Twitter's end, * and so such tweets are not currently removed from the database. * Due to the same issue with the API, it's not clear whether all favs of older tweets are going to be actually * returned from Twitter (that is, it is currently not returning some actually-favorited tweets in a given range). * So, we may miss some older tweets that were in fact favorited, until Twitter fixes this. * The number of pages to page back for each run of the crawler is set by favs_cleanup_pages option. */ public function cleanUpMissedFavsUnFavs() { // first, check that we have the resources to do work if (!($this->api->available && $this->api->available_api_calls_for_crawler)) { $this->logger->logInfo("terminating cleanUpMissedFavsUnFavs-- no API calls available", __METHOD__ . ',' . __LINE__); return true; } $this->logger->logInfo("In cleanUpMissedFavsUnFavs", __METHOD__ . ',' . __LINE__); $this->logger->logInfo("User id: " . $this->user->user_id . "\n", __METHOD__ . ',' . __LINE__); $fcount = 0; $favs_cleanup_pages = 1; // default number of pages to process each time the crawler runs // get plugin option value if it exists & is positive int, otherwise use default $topt = $this->twitter_options; if (isset($topt['favs_cleanup_pages'])) { $conf_favs_cleanup_pages = $topt['favs_cleanup_pages']->option_value; $this->logger->logInfo("conf_favs_cleanup_pages: {$conf_favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__); if (is_integer((int) $conf_favs_cleanup_pages) && $conf_favs_cleanup_pages > 0) { $favs_cleanup_pages = $conf_favs_cleanup_pages; } } $this->logger->logInfo("favs_cleanup_pages: {$favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__); $fpd = DAOFactory::getDAO('FavoritePostDAO'); $pagesize = 20; // number of favs per page retrieved from the API call... (tbd: any way to get //this from the API?) // get 'favs_older_pages' plugin option value if it exists & is pos. int. Use it to calculate default start // page if set, otherwise use default value. $default_start_page = 2; $topt = $this->twitter_options; if (isset($topt['favs_older_pages'])) { $conf_older_favs_pages = $topt['favs_older_pages']->option_value; if (is_integer((int) $conf_older_favs_pages) && $conf_older_favs_pages > 0) { $default_start_page = $conf_older_favs_pages + 1; } } $this->logger->logInfo("default start page: {$default_start_page} ", __METHOD__ . ',' . __LINE__); $last_page_of_favs = round($this->api->archive_limit / $pagesize); $last_unfav_page_checked = $this->instance->last_unfav_page_checked; $start_page = $last_unfav_page_checked > 0 ? $last_unfav_page_checked + 1 : $default_start_page; $this->logger->logInfo("start page: {$start_page}, with {$favs_cleanup_pages} cleanup pages", __METHOD__ . ',' . __LINE__); $curr_favs_count = $this->user->favorites_count; $count = 0; $page = $start_page; while ($count < $favs_cleanup_pages && $this->api->available && $this->api->available_api_calls_for_crawler) { // get the favs from that page try { list($tweets, $cURL_status, $twitter_data) = $this->getFavsPage($page); } catch (APICallLimitExceededException $e) { break; } if ($cURL_status != 200 || $tweets == -1) { // todo - handle more informatively $this->logger->logInfo("in cleanUpMissedFavsUnFavs, error with: {$twitter_data}", __METHOD__ . ',' . __LINE__); throw new Exception("in cleanUpUnFavs: error parsing favs"); } if (sizeof($tweets) == 0) { // then done paging backwards through the favs. // reset pointer so that we start at the recent favs again next time through. $this->instance->last_unfav_page_checked = 0; break; } $min_tweet = $tweets[sizeof($tweets) - 1]['post_id']; $max_tweet = $tweets[0]['post_id']; $this->logger->logInfo("in cleanUpUnFavs, page {$page} min and max: {$min_tweet}, {$max_tweet}", __METHOD__ . ',' . __LINE__); foreach ($tweets as $fav) { $fav['network'] = 'twitter'; // check whether the tweet is in the db-- if not, add it. if ($fpd->addFavorite($this->user->user_id, $fav) > 0) { URLProcessor::processPostURLs($fav['post_text'], $fav['post_id'], 'twitter', $this->logger); $this->logger->logInfo("added fav " . $fav['post_id'], __METHOD__ . ',' . __LINE__); $fcount++; } else { $status_message = "have already stored fav " . $fav['post_id']; $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__); } } // now for each favorited tweet in the database within the fetched range, check whether it's still // favorited. This part of the method is currently disabled due to issues with the Twitter API, which // is not returning all of the favorited tweets any more. So, the fact that a previously-archived // tweet is not returned, no longer indicates that it was un-fav'd. // The method still IDs the 'missing' tweets, but no longer deletes them. We may want to get rid of // this check altogether at some point. $fposts = $fpd->getAllFavoritePostsUpperBound($this->user->user_id, 'twitter', $pagesize, $max_tweet + 1); foreach ($fposts as $old_fav) { $old_fav_id = $old_fav->post_id; if ($old_fav_id < $min_tweet) { $this->logger->logInfo("Old fav {$old_fav_id} out of range ", __METHOD__ . ',' . __LINE__); break; // all the rest will be out of range also then } // look for the old_fav_id in the array of fetched favs $found = false; foreach ($tweets as $tweet) { if ($old_fav_id == $tweet['post_id']) { $found = true; break; } } if (!$found) { // if it's not there... // 14/10 arghh -- Twitter is suddenly (temporarily?) not returning all fav'd tweets in a // sequence. // skipping the delete for now, keep tabs on it. Can check before delete with extra API // request, but the point of doing it this way was to avoid the additional API request. $this->logger->logInfo("Twitter claims tweet not still favorited, but this is currently " . "broken, so not deleting: " . $old_fav_id, __METHOD__ . ',' . __LINE__); // 'unfavorite' by removing from favorites table // $fpd->unFavorite($old_fav_id, $this->user->user_id); } } $this->instance->last_unfav_page_checked = $page++; if ($page > $last_page_of_favs) { $page = 0; break; } $count++; } $this->logger->logUserSuccess("Added {$fcount} older missed favorites", __METHOD__ . ',' . __LINE__); return true; }
/** * Retrieve tweets in search results for a keyword/hashtag. * @param InstanceHashtag $instance_hashtag * @return void */ public function fetchInstanceHashtagTweets($instance_hashtag) { if (isset($this->instance)) { $status_message = ""; $continue_fetching = true; $since_id = 0; $max_id = 0; $instance_hashtag_dao = DAOFactory::getDAO('InstanceHashtagDAO'); $post_dao = DAOFactory::getDAO('PostDAO'); $user_dao = DAOFactory::getDAO('UserDAO'); $hashtagpost_dao = DAOFactory::getDAO('HashtagPostDAO'); $hashtag_dao = DAOFactory::getDAO('HashtagDAO'); //Get hashtag $hashtag = $hashtag_dao->getHashtagByID($instance_hashtag->hashtag_id); while ($continue_fetching) { $endpoint = $this->api->endpoints['search_tweets']; $args = array(); $args["q"] = $hashtag->hashtag; $count_arg = isset($this->twitter_options['tweet_count_per_call']) ? $this->twitter_options['tweet_count_per_call']->option_value : 100; $args["count"] = $count_arg; $args["include_entities"] = "true"; if ($since_id == 0) { $since_id = $instance_hashtag->last_post_id; } if ($since_id > 0) { $args["since_id"] = $since_id; } if ($max_id > $since_id) { $args["max_id"] = $max_id; } try { list($http_status, $payload) = $this->api->apiRequest($endpoint, $args); } catch (APICallLimitExceededException $e) { $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__); break; } if ($http_status == 200) { $this->logger->logDebug('Search tweets 200 ' . $endpoint->getPath(), __METHOD__ . ',' . __LINE__); $count = 0; $user_count = 0; $tweets = $this->api->parseJSONTweetsFromSearch($payload); foreach ($tweets as $tweet) { $this->logger->logDebug('Processing ' . $tweet['post_id'], __METHOD__ . ',' . __LINE__); $this->logger->logDebug('Processing ' . Utils::varDumpToString($tweet), __METHOD__ . ',' . __LINE__); $inserted_post_key = $post_dao->addPost($tweet, $this->user, $this->logger); //We need to check if post exists before add relationship between post and hashtag if ($post_dao->isPostInDB($tweet['post_id'], 'twitter')) { if (!$hashtagpost_dao->isHashtagPostInStorage($hashtag->id, $tweet['post_id'], 'twitter')) { $count = $count + 1; $hashtagpost_dao->insertHashtagPost($hashtag->hashtag, $tweet['post_id'], 'twitter'); $user = new User($tweet); $rows_updated = $user_dao->updateUser($user); if ($rows_updated > 0) { $user_count = $user_count + $rows_updated; } $this->logger->logDebug('User has been updated', __METHOD__ . ',' . __LINE__); if (isset($tweet['retweeted_post']) && isset($tweet['retweeted_post']['content'])) { $this->logger->logDebug('Retweeted post info set', __METHOD__ . ',' . __LINE__); if (!$hashtagpost_dao->isHashtagPostInStorage($hashtag->id, $tweet['retweeted_post']['content']['post_id'], 'twitter')) { $this->logger->logDebug('Retweeted post not in storage', __METHOD__ . ',' . __LINE__); $count++; $hashtagpost_dao->insertHashtagPost($hashtag->hashtag, $tweet['retweeted_post']['content']['post_id'], 'twitter'); $user_retweet = new User($tweet['retweeted_post']['content']); $rows_retweet_updated = $user_dao->updateUser($user_retweet); if ($rows_retweet_updated > 0) { $user_count = $user_count + $rows_retweet_updated; } } else { $this->logger->logDebug('Retweeted post in storage', __METHOD__ . ',' . __LINE__); } } else { $this->logger->logDebug('Retweeted post info not set', __METHOD__ . ',' . __LINE__); } $this->logger->logDebug('About to process URLs', __METHOD__ . ',' . __LINE__); URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); $this->logger->logDebug('URLs have been processed', __METHOD__ . ',' . __LINE__); } } if ($tweet['post_id'] > $instance_hashtag->last_post_id) { $instance_hashtag->last_post_id = $tweet['post_id']; } if ($instance_hashtag->earliest_post_id == 0 || $tweet['post_id'] < $instance_hashtag->earliest_post_id) { $instance_hashtag->earliest_post_id = $tweet['post_id']; } if ($max_id == 0 || $tweet['post_id'] < $max_id) { $max_id = $tweet['post_id']; } $this->logger->logDebug('Instance hashtag markers updated', __METHOD__ . ',' . __LINE__); } //Status message for tweets and users $status_message = ' ' . count($tweets) . " tweet(s) found and {$count} saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); $status_message = ' ' . count($tweets) . " tweet(s) found and {$user_count} users saved"; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); //Save instance_hashtag important values if ($instance_hashtag->last_post_id > 0) { $instance_hashtag_dao->updateLastPostID($instance_hashtag->instance_id, $instance_hashtag->hashtag_id, $instance_hashtag->last_post_id); } if ($instance_hashtag->earliest_post_id > 0) { $instance_hashtag_dao->updateEarliestPostID($instance_hashtag->instance_id, $instance_hashtag->hashtag_id, $instance_hashtag->earliest_post_id); } //Not to continue fetching if search not return the maxim number of tweets if (count($tweets) < $count_arg) { $continue_fetching = false; } } else { $status_message = "Stop fetching tweets. cURL_status = " . $cURL_status; $this->logger->logUserSuccess($status_message, __METHOD__ . ',' . __LINE__); $continue_fetching = false; } } } }
public function testProcessTweetInstagramURLs() { //instagr.am $tweet["post_id"] = 105; $tweet['post_text'] = "This is an Instagram post: http://instagr.am/p/oyQ6/ :)"; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://instagr.am/p/oyQ6/'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/p/oyQ6/'); $this->assertEqual($result->expanded_url, 'http://distillery.s3.amazonaws.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg'); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 105); $this->assertEqual($result->network, 'twitter'); $this->assertTrue($result->is_image); // bad instagr.am URL $tweet["post_id"] = 106; $tweet['post_text'] = "This is an Instagram post with a bad URL: http://instagr.am/p/oyQ5/ :("; URLProcessor::processTweetURLs($this->logger, $tweet); $link_dao = new LinkMySQLDAO(); $result = $link_dao->getLinkByUrl('http://instagr.am/p/oyQ5/'); $this->assertIsA($result, "Link"); $this->assertEqual($result->url, 'http://instagr.am/p/oyQ5/'); $this->assertEqual($result->expanded_url, ''); $this->assertEqual($result->title, ''); $this->assertEqual($result->post_id, 106); $this->assertEqual($result->network, 'twitter'); $this->assertFalse($result->is_image); // test regexp extraction of image link from html $api_call = $this->faux_data_path . "/instagr_am_p_oyQ6"; $resp = file_get_contents($api_call); list($eurl, $is_image) = URLProcessor::extractInstagramImageURL($this->logger, $resp); $this->assertEqual($eurl, 'http://distillery.s3.amazonaws.com/media/2010/12/20/f0f411210cc54353be07cf74ceb79f3b_7.jpg'); $this->assertTrue($is_image); }
/** * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links. */ public function expandOriginalURLs($flickr_api_key = null) { $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit); $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; $has_expanded_flickr_link = false; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link->url)) { $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link->url; while (!$fully_expanded) { //begin Flickr thumbnail processing if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url); $has_expanded_flickr_link = true; $fully_expanded = true; } //end Flickr thumbnail processing $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger); if ($expanded_url == $short_link || $expanded_url == '') { $fully_expanded = true; } else { $this->short_link_dao->insert($link->id, $short_link); } $short_link = $expanded_url; } if (!$has_expanded_flickr_link) { if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $this->link_dao->saveExpandedUrl($link->url, $expanded_url, '', $image_src); $total_expanded = $total_expanded + 1; } else { $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere"); $total_errors = $total_errors + 1; } } } else { $total_errors = $total_errors + 1; $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL"); } $has_expanded_flickr_link = false; } $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }