示例#1
0
 /**
  * @param string $input
  * @param bool $literals
  * @return array
  */
 public static function tokenize($input, $literals = false)
 {
     $original = token_get_all($input);
     $tokens = [];
     $original = self::prepareTokens($original);
     $pos = 1;
     $previous = null;
     for ($i = 0; $i < count($original); $i++) {
         $token = $original[$i];
         $next = isset($original[$i + 1]) ? $original[$i + 1] : null;
         $append = null;
         if (!is_array($token)) {
             $token = self::tokenizeChar($token, $previous);
         }
         // fix ending spaces
         foreach (self::$squashableSpaces as $space => $spaceType) {
             // is value ending with space and begin with something else?
             if (preg_match('/^([^' . $space . ']+?)(' . $space . ')$/', $token[1], $match)) {
                 // move ending spaces to next spaces token
                 if (is_array($next) && preg_match('/^[' . $space . ']+$/', $next[1])) {
                     $token[1] = $match[1];
                     $original[$i + 1][1] = $match[2] . $next[1];
                     $original[$i + 1][2] = $token[2];
                     // append new space token
                 } else {
                     $token[1] = $match[1];
                     $append = [$spaceType, $match[2], $token[2]];
                 }
                 break;
             }
             // transform T_WHITESPACE token names
             if (preg_match('/^(' . $space . ')+$/', $token[1], $match)) {
                 $token[0] = $spaceType;
             }
         }
         $current = ['value' => $token[1], 'type' => $token[0], 'position' => $pos, 'line' => $token[2]];
         if ($literals) {
             $current['literal'] = Lexer::getLiteral($current['type']);
         }
         $tokens[] = $previous = $current;
         $pos += mb_strlen($token[1], 'UTF-8');
         if ($append) {
             $current = ['value' => $append[1], 'type' => $append[0], 'position' => $pos, 'line' => $append[2]];
             if ($literals) {
                 $current['literal'] = Lexer::getLiteral($current['type']);
             }
             $tokens[] = $previous = $current;
             $pos += mb_strlen($append[1], 'UTF-8');
         }
     }
     foreach ($tokens as $i => &$token) {
         if ($token['type'] === Lexer::T_STRING && ($replace = self::postTransformNativeStringTokens($tokens, $token, $i))) {
             $token['type'] = $replace;
             if ($literals) {
                 $token['literal'] = Lexer::getLiteral($token['type']);
             }
         }
     }
     return $tokens;
 }