Used by subclasses that need to parse tweets. Originally written by {@link http://github.com/mikenz Mike Cochrane}, this is based on code by {@link http://github.com/mzsanford Matt Sanford} and heavily modified by {@link http://github.com/ngnpope Nick Pope}.
Author: Mike Cochrane (mikec@mikenz.geek.nz)
Author: Nick Pope (nick@nickpope.me.uk)
 /**
  * Reads in a tweet to be parsed and validates it.
  *
  * @param  string  $tweet  The tweet to validate.
  */
 public function __construct($tweet = null, $config = null)
 {
     parent::__construct($tweet);
     if (!empty($config)) {
         $this->setConfiguration($config);
     }
     $this->extractor = Twitter_Extractor::create();
 }
Beispiel #2
0
 /**
  * Reads in a tweet to be parsed and hit highlighted.
  *
  * We take this opportunity to ensure that we escape user input.
  *
  * @see  htmlspecialchars()
  *
  * @param  string  $tweet        The tweet to be hit highlighted.
  * @param  bool    $escape       Whether to escape the tweet (default: true).
  * @param  bool    $full_encode  Whether to encode all special characters.
  */
 public function __construct($tweet, $escape = true, $full_encode = false)
 {
     if ($escape) {
         if ($full_encode) {
             parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
         } else {
             parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
         }
     } else {
         parent::__construct($tweet);
     }
 }
Beispiel #3
0
 /**
  * This constructor is used to populate some variables.
  *
  * @param  string  $tweet  The tweet to parse.
  */
 protected function __construct($tweet)
 {
     if (is_null(self::$REGEX_VALID_URL)) {
         self::$REGEX_VALID_URL = '/(?:' . '(' . self::REGEX_URL_CHARS_BEFORE . ')' . '(' . '((?:https?:\\/\\/|www\\.)?)' . '(' . self::REGEX_URL_DOMAIN . ')' . '(\\/' . self::REGEX_URL_CHARS_PATH . '*' . self::REGEX_URL_CHARS_PATH_END . '?)?' . '(\\?' . self::REGEX_URL_CHARS_QUERY . '*' . self::REGEX_URL_CHARS_QUERY_END . ')?' . ')' . ')/iux';
     }
     if (is_null(self::$REGEX_REPLY_USERNAME)) {
         self::$REGEX_REPLY_USERNAME = '******' . self::REGEX_WHITESPACE . ')*[@@]([a-zA-Z0-9_]{1,20})/';
     }
     $this->tweet = $tweet;
 }
Beispiel #4
0
 /**
  * Reads in a tweet to be parsed and extracts elements from it.
  *
  * Extracts various parts of a tweet including URLs, usernames, hashtags...
  *
  * @param  string  $tweet  The tweet to extract.
  */
 public function __construct($tweet)
 {
     parent::__construct($tweet);
 }
        #/iox
        # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
        $tmp['validate_url_unicode_subdomain_segment'] = '(?:(?:[a-z0-9]|[^\\x00-\\x7f])(?:(?:[a-z0-9_\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)';
        #/ix
        $tmp['validate_url_unicode_domain_segment'] = '(?:(?:[a-z0-9]|[^\\x00-\\x7f])(?:(?:[a-z0-9\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)';
        #/ix
        $tmp['validate_url_unicode_domain_tld'] = '(?:(?:[a-z]|[^\\x00-\\x7f])(?:(?:[a-z0-9\\-]|[^\\x00-\\x7f])*(?:[a-z0-9]|[^\\x00-\\x7f]))?)';
        #/ix
        $tmp['validate_url_unicode_domain'] = '(?:(?:' . $tmp['validate_url_unicode_subdomain_segment'] . '\\.)*(?:' . $tmp['validate_url_unicode_domain_segment'] . '\\.)' . $tmp['validate_url_unicode_domain_tld'] . ')';
        #/iox
        $tmp['validate_url_unicode_host'] = '(?:' . $tmp['validate_url_ip'] . '|' . $tmp['validate_url_unicode_domain'] . ')';
        #/iox
        $tmp['validate_url_port'] = '[0-9]{1,5}';
        $re['validate_url_unicode_authority'] = '/' . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' . '(' . $tmp['validate_url_unicode_host'] . ')' . '(?::(' . $tmp['validate_url_port'] . '))?' . '/iux';
        $re['validate_url_authority'] = '/' . '(?:(' . $tmp['validate_url_userinfo'] . ')@)?' . '(' . $tmp['validate_url_host'] . ')' . '(?::(' . $tmp['validate_url_port'] . '))?' . '/ix';
        $re['validate_url_scheme'] = '/(?:[a-z][a-z0-9+\\-.]*)/i';
        $re['validate_url_path'] = '/(\\/' . $tmp['validate_url_pchar'] . '*)*/i';
        $re['validate_url_query'] = '/(' . $tmp['validate_url_pchar'] . '|\\/|\\?)*/i';
        $re['validate_url_fragment'] = '/(' . $tmp['validate_url_pchar'] . '|\\/|\\?)*/i';
        # Modified version of RFC 3986 Appendix B
        $re['validate_url_unencoded'] = '/^' . '(?:' . '([^:\\/?#]+):\\/\\/' . ')?' . '([^\\/?#]*)' . '([^?#]*)' . '(?:' . '\\?([^#]*)' . ')?' . '(?:' . '\\#(.*)' . ')?$/iux';
        $re['invalid_characters'] = '/[' . $tmp['invalid_characters'] . ']/u';
        $re['rtl_chars'] = '/[' . $tmp['rtl_chars'] . ']/iu';
        # Flag that initialization is complete:
        $initialized = true;
    }
}
# Cause regular expressions to be initialized as soon as this file is loaded:
Twitter_Regex::__static();
################################################################################
# vim:et:ft=php:nowrap:sts=2:sw=2:ts=2
 /**
  * Reads in a tweet to be parsed and converted to contain links.
  *
  * As the intent is to produce links and output the modified tweet to the
  * user, we take this opportunity to ensure that we escape user input.
  *
  * @see  htmlspecialchars()
  *
  * @param  string  $tweet        The tweet to be converted.
  * @param  bool    $escape       Whether to escape the tweet (default: true).
  * @param  bool    $full_encode  Whether to encode all special characters.
  */
 public function __construct($tweet = null, $escape = true, $full_encode = false)
 {
     if ($escape && !empty($tweet)) {
         if ($full_encode) {
             parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
         } else {
             parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
         }
     } else {
         parent::__construct($tweet);
     }
     $this->extractor = Twitter_Extractor::create();
 }