/** * Return the next token and token type in a SQL string. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens. * * @param String $string The SQL string * @param array $previous The result of the previous getNextToken() call * * @return Array An associative array containing a 'token' and 'type' key. */ protected static function getNextToken($string, $previous = null) { // If the next token is a comment if (substr($string, 0, 2) === '--' || $string[0] === '#' || substr($string, 0, 2) === '/*') { // Comment until end of line if (in_array($string[0], array('-', '#'))) { $last = strpos($string, "\n"); $type = 'comment'; } else { $last = strpos($string, "*/", 2) + 2; $type = 'block comment'; } if ($last === false) { $last = strlen($string); } return array('token' => substr($string, 0, $last), 'type' => $type); } // If the next item is a string if (in_array($string[0], self::$quotes)) { $quote = $string[0]; for ($i = 1; $i < strlen($string); $i++) { $next_char = null; if (isset($string[$i + 1])) { $next_char = $string[$i + 1]; } // Escaped (either backslash or backtick escaped) if ($quote != '`' && $string[$i] === '\\' || $quote === '`' && $string[$i] === '`' && $next_char === '`') { $i++; } elseif ($string[$i] === $quote) { break; } } if ($quote === '`') { $type = 'backtick quote'; } else { $type = 'quote'; } return array('token' => substr($string, 0, $i + 1), 'type' => $type); } elseif (in_array($string[0], self::$boundaries)) { // If it is a simple string or empty between the parentheses, just count as a word // this makes it so we don't split things like NOW() or COUNT(*) into separate lines if ($string[0] === '(') { // "()" if (isset($string[1]) && $string[1] === ')') { return array('token' => '()', 'type' => 'word'); } // "(word/whitespace/boundary)" $next_token = self::getNextToken(substr($string, 1)); if (isset($string[strlen($next_token['token']) + 1]) && $string[strlen($next_token['token']) + 1] === ')') { if (in_array($next_token['type'], array('word', 'whitespace', 'boundary'))) { return array('token' => '(' . $next_token['token'] . ')', 'type' => 'word'); } } } //return single parentheses as their own token if (in_array($string[0], array('(', ')'))) { return array('token' => $string[0], 'type' => $string[0]); } // If there are 1 or more boundary characters together, return as a single word $next_token = self::getNextToken(substr($string, 1)); if ($next_token['type'] === 'boundary') { return array('token' => $string[0] . $next_token['token'], 'type' => 'boundary'); } // Otherwise, just return the single boundary character if (in_array($string[0], array('.', ','))) { $type = $string[0]; } else { $type = 'boundary'; } return array('token' => $string[0], 'type' => $type); } elseif (in_array($string[0], self::$whitespace)) { for ($i = 1; $i < strlen($string); $i++) { if (!in_array($string[$i], self::$whitespace)) { break; } } return array('token' => substr($string, 0, $i), 'type' => 'whitespace'); } // Sort reserved word list from longest word to shortest if (!self::$reserved_sorted) { usort(self::$reserved, array('SqlFormatter', 'sortLength')); self::$reserved_sorted = true; } $all_boundaries = array_merge(self::$boundaries, self::$whitespace); //a reserved word cannot be preceded by a '.' //this makes it so in "mytable.from", "from" is not considered a reserved word if (!$previous || !isset($previous['token']) || $previous['token'] !== '.') { // Reserved word $test = strtoupper($string); foreach (self::$reserved as $word) { // If(strlen($test < strlen($word))) continue; if (substr($test, 0, strlen($word)) === $word) { if (isset($string[strlen($word)]) && !in_array($string[strlen($word)], $all_boundaries)) { continue; } if (in_array($word, self::$special_reserved)) { $type = 'special reserved'; } else { $type = 'reserved'; } return array('token' => substr($string, 0, strlen($word)), 'type' => $type); } } } // Look for first word separator for ($i = 1; $i < strlen($string); $i++) { if (in_array($string[$i], $all_boundaries)) { break; } } $ret = substr($string, 0, $i); if (is_numeric($ret)) { $type = 'number'; } else { $type = 'word'; } return array('token' => $ret, 'type' => $type); }