/** * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words. */ protected static function init() { if (self::$init) { return; } // Sort reserved word list from longest word to shortest usort(self::$reserved, array('SqlFormatter', 'sortLength')); // Set up regular expressions self::$regex_boundaries = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$boundaries)) . ')'; self::$regex_reserved = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved)) . ')'; self::$regex_reserved_toplevel = str_replace(' ', '\\s+', '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved_toplevel)) . ')'); self::$regex_reserved_newline = str_replace(' ', '\\s+', '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$reserved_newline)) . ')'); self::$regex_function = '(' . implode('|', array_map(array('SqlFormatter', 'quote_regex'), self::$functions)) . ')'; self::$init = true; }
/** * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words. */ protected static function init() { if (self::$init) { return; } // Sort reserved word list from longest word to shortest, 3x faster than usort $reservedMap = array_combine(self::$reserved, array_map('strlen', self::$reserved)); arsort($reservedMap); self::$reserved = array_keys($reservedMap); // Set up regular expressions self::$regex_boundaries = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$boundaries)) . ')'; self::$regex_reserved = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved)) . ')'; self::$regex_reserved_toplevel = str_replace(' ', '\\s+', '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_toplevel)) . ')'); self::$regex_reserved_newline = str_replace(' ', '\\s+', '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_newline)) . ')'); self::$regex_function = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$functions)) . ')'; self::$init = true; }
/** * Return the next token and token type in a SQL string. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens. * * @param String $string The SQL string * @param array $previous The result of the previous getNextToken() call * * @return Array An associative array containing a 'token' and 'type' key. */ protected static function getNextToken($string, $previous = null) { // If the next token is a comment if ($string[0] === '#' || substr($string, 0, 2) === '--' || substr($string, 0, 2) === '/*') { // Comment until end of line if ($string[0] === '-' || $string[0] === '#') { $last = strpos($string, "\n"); $type = 'comment'; } else { // Comment until closing comment tag $last = strpos($string, "*/", 2) + 2; $type = 'block comment'; } if ($last === false) { $last = strlen($string); } return array('token' => substr($string, 0, $last), 'type' => $type); } // If the next item is a string if (in_array($string[0], self::$quotes)) { $quote = $string[0]; for ($i = 1, $length = strlen($string); $i < $length; $i++) { $next_char = null; if (isset($string[$i + 1])) { $next_char = $string[$i + 1]; } // Escaped (either backslash or backtick escaped) if ($quote !== '`' && $string[$i] === '\\' || $quote === '`' && $string[$i] === '`' && $next_char === '`') { $i++; } elseif ($string[$i] === $quote) { break; } } if ($quote === '`') { $type = 'backtick quote'; } else { $type = 'quote'; } return array('token' => substr($string, 0, $i + 1), 'type' => $type); } // Separators if (in_array($string[0], self::$boundaries)) { // If it is a simple string or empty between the parentheses, just count as a word // this makes it so we don't split things like NOW() or COUNT(*) into separate lines if ($string[0] === '(') { // "()" if (isset($string[1]) && $string[1] === ')') { return array('token' => '()', 'type' => 'word'); } // "(word/whitespace/boundary)" $next_token = self::getNextToken(substr($string, 1)); $length = strlen($next_token['token']); if (isset($string[$length + 1]) && $string[$length + 1] === ')') { if ($next_token['type'] === 'word' || $next_token['type'] === 'whitespace' || $next_token['type'] === 'boundary') { return array('token' => '(' . $next_token['token'] . ')', 'type' => 'word'); } } } //return single parentheses as their own token if ($string[0] === '(' || $string[0] === ')') { return array('token' => $string[0], 'type' => $string[0]); } // If there are 1 or more boundary characters together, return as a single word $next_token = self::getNextToken(substr($string, 1)); if ($next_token['type'] === 'boundary') { return array('token' => $string[0] . $next_token['token'], 'type' => 'boundary'); } // Otherwise, just return the single boundary character if ($string[0] === '.' || $string[0] === ',') { $type = $string[0]; } else { $type = 'boundary'; } return array('token' => $string[0], 'type' => $type); } // Whitespace if (in_array($string[0], self::$whitespace)) { for ($i = 1, $length = strlen($string); $i < $length; $i++) { if (!in_array($string[$i], self::$whitespace)) { break; } } return array('token' => substr($string, 0, $i), 'type' => 'whitespace'); } if (!self::$init) { //Sort reserved word list from longest word to shortest usort(self::$reserved, array('SqlFormatter', 'sortLength')); //Combine boundary characters and whitespace self::$all_boundaries = array_merge(self::$boundaries, self::$whitespace); self::$init = true; } //a reserved word cannot be preceded by a '.' //this makes it so in "mytable.from", "from" is not considered a reserved word if (!$previous || !isset($previous['token']) || $previous['token'] !== '.') { // Reserved word $test = strtoupper($string); foreach (self::$reserved as $word) { $length = strlen($word); if (substr($test, 0, $length) === $word) { if (isset($string[$length]) && !in_array($string[$length], self::$all_boundaries)) { continue; } if (in_array($word, self::$special_reserved)) { $type = 'special reserved'; } else { $type = 'reserved'; } return array('token' => substr($string, 0, $length), 'type' => $type); } } } // Look for first word separator for ($i = 1, $length = strlen($string); $i < $length; $i++) { if (in_array($string[$i], self::$all_boundaries)) { break; } } $ret = substr($string, 0, $i); if (is_numeric($ret)) { $type = 'number'; } else { $type = 'word'; } return array('token' => $ret, 'type' => $type); }