Esempio n. 1
0
 /**
  * Return the next token and token type in a SQL string.
  * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
  *
  * @param String $string The SQL string
  * @param array $previous The result of the previous getNextToken() call
  *
  * @return Array An associative array containing a 'token' and 'type' key.
  */
 protected static function getNextToken($string, $previous = null)
 {
     // If the next token is a comment
     if (substr($string, 0, 2) === '--' || $string[0] === '#' || substr($string, 0, 2) === '/*') {
         // Comment until end of line
         if (in_array($string[0], array('-', '#'))) {
             $last = strpos($string, "\n");
             $type = 'comment';
         } else {
             $last = strpos($string, "*/", 2) + 2;
             $type = 'block comment';
         }
         if ($last === false) {
             $last = strlen($string);
         }
         return array('token' => substr($string, 0, $last), 'type' => $type);
     }
     // If the next item is a string
     if (in_array($string[0], self::$quotes)) {
         $quote = $string[0];
         for ($i = 1; $i < strlen($string); $i++) {
             $next_char = null;
             if (isset($string[$i + 1])) {
                 $next_char = $string[$i + 1];
             }
             // Escaped (either backslash or backtick escaped)
             if ($quote != '`' && $string[$i] === '\\' || $quote === '`' && $string[$i] === '`' && $next_char === '`') {
                 $i++;
             } elseif ($string[$i] === $quote) {
                 break;
             }
         }
         if ($quote === '`') {
             $type = 'backtick quote';
         } else {
             $type = 'quote';
         }
         return array('token' => substr($string, 0, $i + 1), 'type' => $type);
     } elseif (in_array($string[0], self::$boundaries)) {
         // If it is a simple string or empty between the parentheses, just count as a word
         // this makes it so we don't split things like NOW() or COUNT(*) into separate lines
         if ($string[0] === '(') {
             // "()"
             if (isset($string[1]) && $string[1] === ')') {
                 return array('token' => '()', 'type' => 'word');
             }
             // "(word/whitespace/boundary)"
             $next_token = self::getNextToken(substr($string, 1));
             if (isset($string[strlen($next_token['token']) + 1]) && $string[strlen($next_token['token']) + 1] === ')') {
                 if (in_array($next_token['type'], array('word', 'whitespace', 'boundary'))) {
                     return array('token' => '(' . $next_token['token'] . ')', 'type' => 'word');
                 }
             }
         }
         //return single parentheses as their own token
         if (in_array($string[0], array('(', ')'))) {
             return array('token' => $string[0], 'type' => $string[0]);
         }
         // If there are 1 or more boundary characters together, return as a single word
         $next_token = self::getNextToken(substr($string, 1));
         if ($next_token['type'] === 'boundary') {
             return array('token' => $string[0] . $next_token['token'], 'type' => 'boundary');
         }
         // Otherwise, just return the single boundary character
         if (in_array($string[0], array('.', ','))) {
             $type = $string[0];
         } else {
             $type = 'boundary';
         }
         return array('token' => $string[0], 'type' => $type);
     } elseif (in_array($string[0], self::$whitespace)) {
         for ($i = 1; $i < strlen($string); $i++) {
             if (!in_array($string[$i], self::$whitespace)) {
                 break;
             }
         }
         return array('token' => substr($string, 0, $i), 'type' => 'whitespace');
     }
     // Sort reserved word list from longest word to shortest
     if (!self::$reserved_sorted) {
         usort(self::$reserved, array('SqlFormatter', 'sortLength'));
         self::$reserved_sorted = true;
     }
     $all_boundaries = array_merge(self::$boundaries, self::$whitespace);
     //a reserved word cannot be preceded by a '.'
     //this makes it so in "mytable.from", "from" is not considered a reserved word
     if (!$previous || !isset($previous['token']) || $previous['token'] !== '.') {
         // Reserved word
         $test = strtoupper($string);
         foreach (self::$reserved as $word) {
             // If(strlen($test < strlen($word))) continue;
             if (substr($test, 0, strlen($word)) === $word) {
                 if (isset($string[strlen($word)]) && !in_array($string[strlen($word)], $all_boundaries)) {
                     continue;
                 }
                 if (in_array($word, self::$special_reserved)) {
                     $type = 'special reserved';
                 } else {
                     $type = 'reserved';
                 }
                 return array('token' => substr($string, 0, strlen($word)), 'type' => $type);
             }
         }
     }
     // Look for first word separator
     for ($i = 1; $i < strlen($string); $i++) {
         if (in_array($string[$i], $all_boundaries)) {
             break;
         }
     }
     $ret = substr($string, 0, $i);
     if (is_numeric($ret)) {
         $type = 'number';
     } else {
         $type = 'word';
     }
     return array('token' => $ret, 'type' => $type);
 }