Parses tweets and extracts URLs, usernames, username/list pairs and
hashtags.
Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
is based on code by {@link http://github.com/mzsanford Matt Sanford} and
heavily modified by {@link http://github.com/ngnpope Nick Pope}.
public function generateInsight(Instance $instance, $last_week_of_posts, $number_days) { parent::generateInsight($instance, $last_week_of_posts, $number_days); $this->logger->logInfo("Begin generating insight", __METHOD__ . ',' . __LINE__); if (self::shouldGenerateInsight('favorited_links', $instance, $insight_date = 'today', $regenerate_existing_insight = true)) { $fpost_dao = DAOFactory::getDAO('FavoritePostDAO'); $favorited_posts = $fpost_dao->getAllFavoritePosts($instance->network_user_id, $instance->network, 40); $todays_favorited_posts_with_links = array(); foreach ($favorited_posts as $post) { if (date('Y-m-d', strtotime($post->pub_date)) == date('Y-m-d')) { $post_text = $post->post_text; $text_parser = new Twitter_Extractor($post_text); $elements = $text_parser->extract(); if (count($elements['urls'])) { $todays_favorited_posts_with_links[] = $post; } } } $favorited_links_count = count($todays_favorited_posts_with_links); if ($favorited_links_count) { $verb = ''; $post_type = ''; if ($favorited_links_count == 1) { $insight_text = $this->username . " " . $this->terms->getVerb('liked') . " <strong>1 " . $this->terms->getNoun('post') . "</strong> with a link in it."; } else { $insight_text = $this->username . " " . $this->terms->getVerb('liked') . " <strong>" . $favorited_links_count . " " . $this->terms->getNoun('post', InsightTerms::PLURAL) . "</strong> with links in them:"; } $this->insight_dao->insertInsightDeprecated("favorited_links", $instance->id, $this->insight_date, "Links you liked:", $insight_text, basename(__FILE__, ".php"), Insight::EMPHASIS_LOW, serialize($todays_favorited_posts_with_links)); } } $this->logger->logInfo("Done generating insight", __METHOD__ . ',' . __LINE__); }
public function generateInsight(Instance $instance, $last_week_of_posts, $number_days) { parent::generateInsight($instance, $last_week_of_posts, $number_days); $this->logger->logInfo("Begin generating insight", __METHOD__ . ',' . __LINE__); if (self::shouldGenerateInsight('link_prompt', $instance, $insight_date = 'today', $regenerate_existing_insight = false, $day_of_week = null, $count_last_week_of_posts = null, $excluded_networks = array('foursquare', 'youtube'), $alternate_day = (int) date('j') % 2)) { $post_dao = DAOFactory::getDAO('PostDAO'); $link_dao = DAOFactory::getDAO('LinkDAO'); // Check from midnight two days ago until an hour from now // (to avoid clock-sync issues) $recent_posts = $post_dao->getPostsByUserInRange($instance->network_user_id, $instance->network, date('Y-m-d H:i:s', strtotime('-2 days midnight')), date('Y-m-d H:i:s', strtotime('+1 hour'))); $posts_with_links = array(); foreach ($recent_posts as $post) { $post_text = $post->post_text; $text_parser = new Twitter_Extractor($post_text); $elements = $text_parser->extract(); if (count($elements['urls'])) { $posts_with_links[] = $post; } } $num_posts = $post_dao->countAllPostsByUserSinceDaysAgo($instance->network_user_id, $instance->network, 30); $num_links = $link_dao->countLinksPostedByUserSinceDaysAgo($instance->network_user_id, $instance->network, 30); if ($num_posts && $num_links / $num_posts > 0.2 && count($recent_posts) && !count($posts_with_links)) { $insight_text = $this->username . " hasn't " . $this->terms->getVerb('posted') . " a link in the last 2 days. It may be time to share an interesting link with " . $this->terms->getNoun('friend', InsightTerms::PLURAL) . "."; $this->insight_dao->insertInsightDeprecated('link_prompt', $instance->id, $this->insight_date, "Nudge:", $insight_text, basename(__FILE__, ".php"), Insight::EMPHASIS_LOW); } } $this->logger->logInfo("Done generating insight", __METHOD__ . ',' . __LINE__); }
public function generateInsight(Instance $instance, $last_week_of_posts, $number_days) { parent::generateInsight($instance, $last_week_of_posts, $number_days); $this->logger->logInfo("Begin generating insight", __METHOD__ . ',' . __LINE__); if (self::shouldGenerateInsight('interactions', $instance, $insight_date = 'today', $regenerate_existing_insight = false, $day_of_week = 3, count($last_week_of_posts), $excluded_networks = array('facebook', 'google+', 'foursquare', 'youtube'))) { $user_dao = DAOFactory::getDAO('UserDAO'); $mentions_count = array(); $mentions_info = array(); $insight_data = array(); foreach ($last_week_of_posts as $post) { $post_text = $post->post_text; // Extract mentions from post text $text_parser = new Twitter_Extractor($post_text); $elements = $text_parser->extract(); $mentions_in_post = $elements['mentions']; foreach ($mentions_in_post as $mention_in_post) { if ($mention_in_post == $instance->network_username) { // Don't count metweets continue; } else { $mentioned_user = $user_dao->getUserByName($mention_in_post, $instance->network); if (isset($mentioned_user)) { $mention_in_post = '@' . $mentioned_user->username; $mentions_info[$mention_in_post] = $mentioned_user; } else { $mention_in_post = '@' . $mention_in_post; } // Update mention count if (array_key_exists($mention_in_post, $mentions_count)) { $mentions_count[$mention_in_post]++; } else { $mentions_count[$mention_in_post] = 1; } } } } if (count($mentions_count)) { // Get most mentioned user arsort($mentions_count); $most_mentioned_user = each($mentions_count); // Add mentions to dataset foreach ($mentions_count as $mention => $count) { $mention_info['mention'] = $mention; $mention_info['count'] = $count; $mention_info['user'] = $mentions_info[$mention]; $insight_data[] = $mention_info; } } if (isset($most_mentioned_user)) { $insight_text = $this->username . " mentioned " . $most_mentioned_user['key'] . " <strong>" . $this->terms->getOccurrencesAdverb($most_mentioned_user['value']) . "</strong> last week."; $this->insight_dao->insertInsightDeprecated('interactions', $instance->id, $this->insight_date, "BFFs:", $insight_text, basename(__FILE__, ".php"), Insight::EMPHASIS_LOW, serialize($insight_data)); } } $this->logger->logInfo("Done generating insight", __METHOD__ . ',' . __LINE__); }
public function generateInsight(Instance $instance, $last_week_of_posts, $number_days) { parent::generateInsight($instance, $last_week_of_posts, $number_days); $this->logger->logInfo("Begin generating insight", __METHOD__ . ',' . __LINE__); if (self::shouldGenerateInsight('metweet', $instance, $insight_date = 'today', $regenerate_existing_insight = false, $day_of_week = 2, count($last_week_of_posts), $excluded_networks = array('facebook', 'google+', 'foursquare', 'youtube'))) { $metweet_count = 0; foreach ($last_week_of_posts as $post) { if (isset($post->in_retweet_of_post_id)) { $post_text = $post->post_text; $text_parser = new Twitter_Extractor($post_text); $elements = $text_parser->extract(); $mentions_in_post = $elements['mentions']; if (in_array($instance->network_username, $mentions_in_post)) { $metweet_count++; } } } if ($metweet_count > 1) { $insight_text = $this->username . " retweeted " . $this->username . " mentions " . "<strong>" . $this->terms->getOccurrencesAdverb($metweet_count) . "</strong> last week"; $insight_baseline_dao = DAOFactory::getDAO('InsightBaselineDAO'); $insight_baseline_dao->insertInsightBaseline("metweet_count", $instance->id, $metweet_count, $this->insight_date); $last_monday = date('Y-m-d', strtotime('-7 day')); $last_monday_insight_baseline = $insight_baseline_dao->getInsightBaseline("metweet_count", $instance->id, $last_monday); if (isset($last_monday_insight_baseline)) { if ($last_monday_insight_baseline->value > $metweet_count) { $difference = $last_monday_insight_baseline->value - $metweet_count; $insight_text .= ", {$difference} fewer time" . ($difference > 1 ? "s" : "") . " than the prior week."; } elseif ($last_monday_insight_baseline->value < $metweet_count) { $difference = $metweet_count - $last_monday_insight_baseline->value; $insight_text .= ", {$difference} more time" . ($difference > 1 ? "s" : "") . " than the prior week."; } else { $insight_text .= "."; } } else { $insight_text .= "."; } $this->insight_dao->insertInsightDeprecated("metweet", $instance->id, $this->insight_date, "Metweets:", $insight_text, basename(__FILE__, ".php"), Insight::EMPHASIS_LOW); } } $this->logger->logInfo("Done generating insight", __METHOD__ . ',' . __LINE__); }
/** * Determines the length of a tweet. Takes shortening of URLs into account. * * @param string $tweet The tweet to validate. * @return int the length of a tweet. */ public function getTweetLength($tweet = null) { if (is_null($tweet)) { $tweet = $this->tweet; } $length = mb_strlen($tweet); $urls_with_indices = $this->extractor->extractURLsWithIndices($tweet); foreach ($urls_with_indices as $x) { $length += $x['indices'][0] - $x['indices'][1]; $length += stripos($x['url'], 'https://') === 0 ? $this->short_url_length_https : $this->short_url_length; } return $length; }
function extractIt($string) { require_once 'library/extractor/Extractor.php'; $extractor = Twitter_Extractor::create(); return $extractor->extract($string); }
function theme_action_icons($status) { $from = $status->from->screen_name; $retweeted_by = $status->retweeted_by->user->screen_name; $retweeted_id = $status->retweeted_by->id; $geo = $status->geo; $actions = array(); if (!$status->is_direct) { $actions[] = theme('action_icon', "user/{$from}/reply/{$status->id}", 'images/reply.png', '@'); } //Reply All functionality. if (substr_count($status->text, '@') >= 1) { $found = Twitter_Extractor::extractMentionedScreennames($status->text); $to_users = array_unique($found); $key = array_search(user_current_username(), $to_users); // Remove the username of the authenticated user if ($key != NULL || $key !== FALSE) { unset($to_users[$key]); // remove the username from array } if (count($to_users) >= 1) { $actions[] = theme('action_icon', "user/{$from}/replyall/{$status->id}", 'images/replyall.png', 'REPLY ALL'); } } if (!user_is_current_user($from)) { $actions[] = theme('action_icon', "directs/create/{$from}", 'images/dm.png', 'DM'); } if (!$status->is_direct) { if ($status->favorited == '1') { $actions[] = theme('action_icon', "unfavourite/{$status->id}", 'images/star.png', 'UNFAV'); } else { $actions[] = theme('action_icon', "favourite/{$status->id}", 'images/star_grey.png', 'FAV'); } $actions[] = theme('action_icon', "retweet/{$status->id}", 'images/retweet.png', 'RT'); if (user_is_current_user($from)) { $actions[] = theme('action_icon', "confirm/delete/{$status->id}", 'images/trash.gif', 'DEL'); } if ($retweeted_by) { if (user_is_current_user($retweeted_by)) { $actions[] = theme('action_icon', "confirm/delete/{$retweeted_id}", 'images/trash.gif', 'DEL'); } } } else { $actions[] = theme('action_icon', "directs/delete/{$status->id}", 'images/trash.gif', 'DEL'); } if ($geo !== null) { $latlong = $geo->coordinates; $lat = $latlong[0]; $long = $latlong[1]; $actions[] = theme('action_icon', "http://maps.google.co.uk/m?q={$lat},{$long}", 'images/map.png', 'MAP'); } //Search for @ to a user $actions[] = theme('action_icon', "search?query=%40{$from}", 'images/q.png', '?'); return implode(' ', $actions); }
echo PHP_EOL; } $pass_total = 0; $fail_total = 0; $pass_group = 0; $fail_group = 0; output_preamble(); output_h1('Twitter Text (PHP Edition) Library » Conformance'); output_h2('Extraction Conformance'); # timer $timerStart = microtime(true); # Load the test data. $data = Yaml::parse($DATA . '/extract.yml'); # Define the functions to be tested. $functions = array('hashtags' => 'extractHashtags', 'cashtags' => 'extractCashtags', 'urls' => 'extractURLs', 'mentions' => 'extractMentionedScreennames', 'replies' => 'extractReplyScreenname', 'hashtags_with_indices' => 'extractHashtagsWithIndices', 'cashtags_with_indices' => 'extractCashtagsWithIndices', 'urls_with_indices' => 'extractURLsWithIndices', 'mentions_with_indices' => 'extractMentionedScreennamesWithIndices', 'mentions_or_lists_with_indices' => 'extractMentionsOrListsWithIndices'); $extractor = Twitter_Extractor::create(); # Perform testing. foreach ($data['tests'] as $group => $tests) { output_h3('Test Group - ' . ucfirst(str_replace('_', ' ', $group))); if (!array_key_exists($group, $functions)) { output_skip_test(); continue; } $function = $functions[$group]; $pass_group = 0; $fail_group = 0; if ($browser) { echo '<ul>', PHP_EOL; } foreach ($tests as $test) { echo $browser ? '<li>' : ' - ';
public function saving($model) { $hashtags = Twitter_Extractor::create($model->title)->extractHashtags(); $model->tag($hashtags); }
/** * Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class * added. * * @return string that auto-link HTML added * @since 1.1.0 */ public function autoLinkCashtags($tweet = null) { if (is_null($tweet)) { $tweet = $this->tweet; } $entities = $this->extractor->extractCashtagsWithIndices($tweet); return $this->autoLinkEntities($tweet, $entities); }
/** * Check whether a hashtag is valid. * * @return boolean Whether the hashtag is valid. */ public function validateHashtag() { $length = mb_strlen($this->tweet); if (!$this->tweet || !$length) { return false; } $extracted = Twitter_Extractor::create($this->tweet)->extractHashtags(); return count($extracted) === 1 && $extracted[0] === substr($this->tweet, 1); }
public function testExtractURLsWithIndicesWithoutProtocol() { $extracted = Twitter_Extractor::create('text: example.com')->extractUrlWithoutProtocol(false)->extractURLsWithIndices(); $this->assertSame(array(), $extracted, 'Unextract url without protocol'); }
function twitter_is_reply($status) { if (!user_is_authenticated()) { return false; } $user = user_current_username(); // Use Twitter Entities to see if this contains a mention of the user if ($status->entities) { if ($status->entities->user_mentions) { $entities = $status->entities; foreach ($entities->user_mentions as $mentions) { if ($mentions->screen_name == $user) { return true; } } } return false; } // If there are no entities (for example on a search) do a simple regex $found = Twitter_Extractor::create($status->text)->extractMentionedUsernames(); foreach ($found as $mentions) { // Case insensitive compare if (strcasecmp($mentions, $user) == 0) { return true; } } return false; }
public function handleIrcReceived(UserEvent $event, EventQueue $queue) { $params = $event->getParams(); $extractor = new \Twitter_Extractor($params['text']); $urls = $extractor->extractURLs(); foreach ($urls as $url) { $this->handleUrl($url, $event, $queue); } }
/** * @dataProvider extractMentionedUsernamesWithIndicesProvider */ public function testExtractMentionedUsernamesWithIndices($description, $text, $expected) { $extracted = Twitter_Extractor::create($text)->extractMentionedUsernamesWithIndices(); $this->assertSame($expected, $extracted, $description); }
\$tweet = 'Tweet mentioning @mikenz and referring to his list @mikeNZ/sports and website http://mikenz.geek.nz #awesome'; \$data = Twitter_Extractor::create(\$tweet) ->extract(); print_r(\$data); EOPHP; if ($browser) { echo '<h3>Source</h3>', PHP_EOL; echo '<pre class="source">'; highlight_string($code); echo '</pre>', PHP_EOL; } else { echo 'Source:', PHP_EOL, PHP_EOL; echo $code; echo PHP_EOL, PHP_EOL; } $data = Twitter_Extractor::create($tweet)->extract(); if ($browser) { echo '<h3>Output</h3>', PHP_EOL; echo '<pre class="output">'; print_array($data); echo '</pre>', PHP_EOL; } else { echo 'Output:', PHP_EOL, PHP_EOL; print_array($data); echo PHP_EOL, PHP_EOL; } if ($browser) { echo '<h2>'; } echo 'Autolink Examples'; if ($browser) {
function twitter_parse_tags($input, $entities = false, $id = false, $source = NULL) { // Filter. if ($id && substr($_GET["q"], 0, 6) !== "status" && setting_fetch('filtero', 'no') == 'yes' && twitter_timeline_filter($input . ' ' . $source)) { return "<a href='" . BASE_URL . "status/{$id}' style='text-decoration:none;'><small>[" . __("Tweet Filtered") . "]</small></a>"; } // Linebreaks. Some clients insert \n for formatting. $out = nl2br($input); // Use the Entities to replace hyperlink URLs if ($entities && $entities->urls) { foreach ($entities->urls as $urls) { if ($urls->expanded_url != "") { $display_url = $urls->expanded_url; } else { $display_url = $urls->url; } $url_detect = parse_url($display_url); if (isset($url_detect["scheme"])) { $link_html = theme('external_link', $display_url); $url = $urls->url; // Replace all URLs *UNLESS* they have already been linked (for example to an image) $pattern = '#((?<!href\\=(\'|\\"))' . preg_quote($url, '#') . ')#i'; $out = preg_replace($pattern, $link_html, $out); } } } else { // If Entities haven't been returned, use Autolink // Create an array containing all URLs $urls = Twitter_Extractor::create($input)->extractURLs(); // Hyperlink the URLs $out = Twitter_Autolink::create($out)->addLinksToURLs(); // Hyperlink the # $out = Twitter_Autolink::create($out)->setTarget('')->addLinksToHashtags(); } // Hyperlink the @ and lists $out = Twitter_Autolink::create($out)->setTarget('')->addLinksToUsernamesAndLists(); // Hyperlink the # $out = Twitter_Autolink::create($out)->setTarget('')->addLinksToHashtags(); //Return the completed string return $out; }
echo $browser ? '<p>' : " [1;35m"; echo 'Skipping Test...'; echo $browser ? '</p>' : "[0m" . PHP_EOL; echo PHP_EOL; continue; } $function = $functions[$group]; $pass_group = 0; $fail_group = 0; if ($browser) { echo '<ul>', PHP_EOL; } foreach ($tests as $test) { echo $browser ? '<li>' : ' - '; echo $test['description'], ' ... '; $extracted = Twitter_Extractor::create($test['text'])->{$function}(); if ($test['expected'] == $extracted) { $pass_group++; echo $browser ? '<span class="pass">PASS</span>' : "[1;32mPASS[0m"; } else { $fail_group++; echo $browser ? '<span class="fail">FAIL</span>' : "[1;31mFAIL[0m"; if ($browser) { echo '<pre>'; echo 'Original: ' . htmlspecialchars($test['text'], ENT_QUOTES, 'UTF-8', false), PHP_EOL; echo 'Expected: ' . pretty_format($test['expected']), PHP_EOL; echo 'Actual: ' . pretty_format($extracted); echo '</pre>'; } else { echo PHP_EOL, PHP_EOL; echo ' Original: ' . $test['text'], PHP_EOL;
/** * linkify text * * @param string $value * @param array $options * username: linkify username. eg. @username * hashtag : linkify hashtag. eg. #hashtag * url : linkify url. eg. http://example.com/ * @return string */ public function linkify($value, $options = array()) { $default = array('url' => true, 'username' => true, 'hashtag' => true); $options = am($default, $options); $extractor = Twitter_Extractor::create($value); $linker = TwimAutolink::create($value, $options); $entities = array(); // autolink if ($options['url']) { $entities = am($entities, $extractor->extractURLWithoutProtocol(false)->extractURLsWithIndices()); } if ($options['hashtag']) { $entities = am($entities, $extractor->extractHashtagsWithIndices()); } if ($options['username']) { $entities = am($entities, $extractor->extractMentionsOrListsWithIndices()); } $entities = $extractor->removeOverlappingEntities($entities); $tweet = $linker->autoLinkEntities($value, $entities); return $tweet; }
/** * Determines the length of a tweet. Takes shortening of URLs into account. * * @return int the length of a tweet. */ public function getLength() { $length = mb_strlen($this->tweet); $urls_with_indices = Twitter_Extractor::create($this->tweet)->extractURLsWithIndices(); foreach ($urls_with_indices as $x) { $length += $x['indices'][0] - $x['indices'][1]; $length += stripos($x['url'], 'https://') === 0 ? self::SHORT_URL_LENGTH_HTTPS : self::SHORT_URL_LENGTH; } return $length; }
/** * Extracts mentions from a Tweet. * * @param str $post_text The post text to search. * @return array $matches All mentions in this tweet. */ public static function extractMentions($post_text) { if (!class_exists('Twitter_Extractor')) { Loader::addSpecialClass('Twitter_Extractor', 'plugins/twitter/extlib/twitter-text-php/lib/Twitter/Extractor.php'); } $tweet = new Twitter_Extractor($post_text); $mentions = $tweet->extractMentionedUsernames(); foreach ($mentions as $k => $v) { $mentions[$k] = '@' . $v; } return $mentions; }
public function handleIrcReceived(UserEvent $event, EventQueue $queue) { $params = $event->getParams(); $extractor = new \Twitter_Extractor($params['text']); $urls = $extractor->extractURLs(); foreach ($urls as $url) { if ($this->filter !== null && $this->filter->filter(new UrlEvent($url, $event)) !== false) { continue; } $this->handleUrl($url, $event, $queue); } }