示例#1
0
 /**
  * Stuff that only needs to be done once.  Builds regular expressions and sorts the reserved words.
  */
 protected static function init()
 {
     if (self::$init) {
         return;
     }
     // Sort reserved word list from longest word to shortest
     usort(self::$reserved, array('SqlFormatter', 'sortLength'));
     // Set up regular expressions
     self::$regex_boundaries = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$boundaries)) . ')';
     self::$regex_reserved = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved)) . ')';
     self::$regex_reserved_toplevel = str_replace(' ', '\\s+', '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved_toplevel)) . ')');
     self::$regex_reserved_newline = str_replace(' ', '\\s+', '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved_newline)) . ')');
     self::$regex_function = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$functions)) . ')';
     self::$init = true;
 }
 /**
  * Stuff that only needs to be done once.  Builds regular expressions and sorts the reserved words.
  */
 protected static function init()
 {
     if (self::$init) {
         return;
     }
     // Sort reserved word list from longest word to shortest, 3x faster than usort
     $reservedMap = array_combine(self::$reserved, array_map('strlen', self::$reserved));
     arsort($reservedMap);
     self::$reserved = array_keys($reservedMap);
     // Set up regular expressions
     self::$regex_boundaries = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$boundaries)) . ')';
     self::$regex_reserved = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved)) . ')';
     self::$regex_reserved_toplevel = str_replace(' ', '\\s+', '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_toplevel)) . ')');
     self::$regex_reserved_newline = str_replace(' ', '\\s+', '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_newline)) . ')');
     self::$regex_function = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$functions)) . ')';
     self::$init = true;
 }
示例#3
0
 /**
  * Return the next token and token type in a SQL string.
  * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
  *
  * @param String $string The SQL string
  * @param array $previous The result of the previous getNextToken() call
  *
  * @return Array An associative array containing a 'token' and 'type' key.
  */
 protected static function getNextToken($string, $previous = null)
 {
     // If the next token is a comment
     if ($string[0] === '#' || substr($string, 0, 2) === '--' || substr($string, 0, 2) === '/*') {
         // Comment until end of line
         if ($string[0] === '-' || $string[0] === '#') {
             $last = strpos($string, "\n");
             $type = 'comment';
         } else {
             // Comment until closing comment tag
             $last = strpos($string, "*/", 2) + 2;
             $type = 'block comment';
         }
         if ($last === false) {
             $last = strlen($string);
         }
         return array('token' => substr($string, 0, $last), 'type' => $type);
     }
     // If the next item is a string
     if (in_array($string[0], self::$quotes)) {
         $quote = $string[0];
         for ($i = 1, $length = strlen($string); $i < $length; $i++) {
             $next_char = null;
             if (isset($string[$i + 1])) {
                 $next_char = $string[$i + 1];
             }
             // Escaped (either backslash or backtick escaped)
             if ($quote !== '`' && $string[$i] === '\\' || $quote === '`' && $string[$i] === '`' && $next_char === '`') {
                 $i++;
             } elseif ($string[$i] === $quote) {
                 break;
             }
         }
         if ($quote === '`') {
             $type = 'backtick quote';
         } else {
             $type = 'quote';
         }
         return array('token' => substr($string, 0, $i + 1), 'type' => $type);
     }
     // Separators
     if (in_array($string[0], self::$boundaries)) {
         // If it is a simple string or empty between the parentheses, just count as a word
         // this makes it so we don't split things like NOW() or COUNT(*) into separate lines
         if ($string[0] === '(') {
             // "()"
             if (isset($string[1]) && $string[1] === ')') {
                 return array('token' => '()', 'type' => 'word');
             }
             // "(word/whitespace/boundary)"
             $next_token = self::getNextToken(substr($string, 1));
             $length = strlen($next_token['token']);
             if (isset($string[$length + 1]) && $string[$length + 1] === ')') {
                 if ($next_token['type'] === 'word' || $next_token['type'] === 'whitespace' || $next_token['type'] === 'boundary') {
                     return array('token' => '(' . $next_token['token'] . ')', 'type' => 'word');
                 }
             }
         }
         //return single parentheses as their own token
         if ($string[0] === '(' || $string[0] === ')') {
             return array('token' => $string[0], 'type' => $string[0]);
         }
         // If there are 1 or more boundary characters together, return as a single word
         $next_token = self::getNextToken(substr($string, 1));
         if ($next_token['type'] === 'boundary') {
             return array('token' => $string[0] . $next_token['token'], 'type' => 'boundary');
         }
         // Otherwise, just return the single boundary character
         if ($string[0] === '.' || $string[0] === ',') {
             $type = $string[0];
         } else {
             $type = 'boundary';
         }
         return array('token' => $string[0], 'type' => $type);
     }
     // Whitespace
     if (in_array($string[0], self::$whitespace)) {
         for ($i = 1, $length = strlen($string); $i < $length; $i++) {
             if (!in_array($string[$i], self::$whitespace)) {
                 break;
             }
         }
         return array('token' => substr($string, 0, $i), 'type' => 'whitespace');
     }
     if (!self::$init) {
         //Sort reserved word list from longest word to shortest
         usort(self::$reserved, array('SqlFormatter', 'sortLength'));
         //Combine boundary characters and whitespace
         self::$all_boundaries = array_merge(self::$boundaries, self::$whitespace);
         self::$init = true;
     }
     //a reserved word cannot be preceded by a '.'
     //this makes it so in "mytable.from", "from" is not considered a reserved word
     if (!$previous || !isset($previous['token']) || $previous['token'] !== '.') {
         // Reserved word
         $test = strtoupper($string);
         foreach (self::$reserved as $word) {
             $length = strlen($word);
             if (substr($test, 0, $length) === $word) {
                 if (isset($string[$length]) && !in_array($string[$length], self::$all_boundaries)) {
                     continue;
                 }
                 if (in_array($word, self::$special_reserved)) {
                     $type = 'special reserved';
                 } else {
                     $type = 'reserved';
                 }
                 return array('token' => substr($string, 0, $length), 'type' => $type);
             }
         }
     }
     // Look for first word separator
     for ($i = 1, $length = strlen($string); $i < $length; $i++) {
         if (in_array($string[$i], self::$all_boundaries)) {
             break;
         }
     }
     $ret = substr($string, 0, $i);
     if (is_numeric($ret)) {
         $type = 'number';
     } else {
         $type = 'word';
     }
     return array('token' => $ret, 'type' => $type);
 }