Used by subclasses that need to parse tweets.
Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
is based on code by {@link http://github.com/mzsanford Matt Sanford} and
heavily modified by {@link http://github.com/ngnpope Nick Pope}.
/** * Reads in a tweet to be parsed and validates it. * * @param string $tweet The tweet to validate. */ public function __construct($tweet = null, $config = null) { parent::__construct($tweet); if (!empty($config)) { $this->setConfiguration($config); } $this->extractor = Twitter_Extractor::create(); }
/** * Reads in a tweet to be parsed and hit highlighted. * * We take this opportunity to ensure that we escape user input. * * @see htmlspecialchars() * * @param string $tweet The tweet to be hit highlighted. * @param bool $escape Whether to escape the tweet (default: true). * @param bool $full_encode Whether to encode all special characters. */ public function __construct($tweet, $escape = true, $full_encode = false) { if ($escape) { if ($full_encode) { parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false)); } else { parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false)); } } else { parent::__construct($tweet); } }
/** * This constructor is used to populate some variables. * * @param string $tweet The tweet to parse. */ protected function __construct($tweet) { if (is_null(self::$REGEX_VALID_URL)) { self::$REGEX_VALID_URL = '/(?:' . '(' . self::REGEX_URL_CHARS_BEFORE . ')' . '(' . '((?:https?:\\/\\/|www\\.)?)' . '(' . self::REGEX_URL_DOMAIN . ')' . '(\\/' . self::REGEX_URL_CHARS_PATH . '*' . self::REGEX_URL_CHARS_PATH_END . '?)?' . '(\\?' . self::REGEX_URL_CHARS_QUERY . '*' . self::REGEX_URL_CHARS_QUERY_END . ')?' . ')' . ')/iux'; } if (is_null(self::$REGEX_REPLY_USERNAME)) { self::$REGEX_REPLY_USERNAME = '******' . self::REGEX_WHITESPACE . ')*[@@]([a-zA-Z0-9_]{1,20})/'; } $this->tweet = $tweet; }
/** * Reads in a tweet to be parsed and extracts elements from it. * * Extracts various parts of a tweet including URLs, usernames, hashtags... * * @param string $tweet The tweet to extract. */ public function __construct($tweet) { parent::__construct($tweet); }
#/iox # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences $tmp['validate_url_unicode_subdomain_segment'] = '(?:(?:[a-z0-9]|[^\\x00-\\x7f])(?:(?:[a-z0-9_\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)'; #/ix $tmp['validate_url_unicode_domain_segment'] = '(?:(?:[a-z0-9]|[^\\x00-\\x7f])(?:(?:[a-z0-9\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)'; #/ix $tmp['validate_url_unicode_domain_tld'] = '(?:(?:[a-z]|[^\\x00-\\x7f])(?:(?:[a-z0-9\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)'; #/ix $tmp['validate_url_unicode_domain'] = '(?:(?:' . $tmp['validate_url_unicode_subdomain_segment'] . '\\.)*(?:' . $tmp['validate_url_unicode_domain_segment'] . '\\.)' . $tmp['validate_url_unicode_domain_tld'] . ')'; #/iox $tmp['validate_url_unicode_host'] = '(?:' . $tmp['validate_url_ip'] . '|' . $tmp['validate_url_unicode_domain'] . ')'; #/iox $tmp['validate_url_port'] = '[0-9]{1,5}'; $re['validate_url_unicode_authority'] = '/' . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' . '(' . $tmp['validate_url_unicode_host'] . ')' . '(?::(' . $tmp['validate_url_port'] . '))?' . '/iux'; $re['validate_url_authority'] = '/' . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' . '(' . $tmp['validate_url_host'] . ')' . '(?::(' . $tmp['validate_url_port'] . '))?' . '/ix'; $re['validate_url_scheme'] = '/(?:[a-z][a-z0-9+\\-.]*)/i'; $re['validate_url_path'] = '/(\\/' . $tmp['validate_url_pchar'] . '*)*/i'; $re['validate_url_query'] = '/(' . $tmp['validate_url_pchar'] . '|\\/|\\?)*/i'; $re['validate_url_fragment'] = '/(' . $tmp['validate_url_pchar'] . '|\\/|\\?)*/i'; # Modified version of RFC 3986 Appendix B $re['validate_url_unencoded'] = '/^' . '(?:' . '([^:\\/?#]+):\\/\\/' . ')?' . '([^\\/?#]*)' . '([^?#]*)' . '(?:' . '\\?([^#]*)' . ')?' . '(?:' . '\\#(.*)' . ')?$/iux'; $re['invalid_characters'] = '/[' . $tmp['invalid_characters'] . ']/u'; $re['rtl_chars'] = '/[' . $tmp['rtl_chars'] . ']/iu'; # Flag that initialization is complete: $initialized = true; } } # Cause regular expressions to be initialized as soon as this file is loaded: Twitter_Regex::__static(); ################################################################################ # vim:et:ft=php:nowrap:sts=2:sw=2:ts=2
/** * Reads in a tweet to be parsed and converted to contain links. * * As the intent is to produce links and output the modified tweet to the * user, we take this opportunity to ensure that we escape user input. * * @see htmlspecialchars() * * @param string $tweet The tweet to be converted. * @param bool $escape Whether to escape the tweet (default: true). * @param bool $full_encode Whether to encode all special characters. */ public function __construct($tweet = null, $escape = true, $full_encode = false) { if ($escape && !empty($tweet)) { if ($full_encode) { parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false)); } else { parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false)); } } else { parent::__construct($tweet); } $this->extractor = Twitter_Extractor::create(); }