/** * @param string $input * @param bool $literals * @return array */ public static function tokenize($input, $literals = false) { $original = token_get_all($input); $tokens = []; $original = self::prepareTokens($original); $pos = 1; $previous = null; for ($i = 0; $i < count($original); $i++) { $token = $original[$i]; $next = isset($original[$i + 1]) ? $original[$i + 1] : null; $append = null; if (!is_array($token)) { $token = self::tokenizeChar($token, $previous); } // fix ending spaces foreach (self::$squashableSpaces as $space => $spaceType) { // is value ending with space and begin with something else? if (preg_match('/^([^' . $space . ']+?)(' . $space . ')$/', $token[1], $match)) { // move ending spaces to next spaces token if (is_array($next) && preg_match('/^[' . $space . ']+$/', $next[1])) { $token[1] = $match[1]; $original[$i + 1][1] = $match[2] . $next[1]; $original[$i + 1][2] = $token[2]; // append new space token } else { $token[1] = $match[1]; $append = [$spaceType, $match[2], $token[2]]; } break; } // transform T_WHITESPACE token names if (preg_match('/^(' . $space . ')+$/', $token[1], $match)) { $token[0] = $spaceType; } } $current = ['value' => $token[1], 'type' => $token[0], 'position' => $pos, 'line' => $token[2]]; if ($literals) { $current['literal'] = Lexer::getLiteral($current['type']); } $tokens[] = $previous = $current; $pos += mb_strlen($token[1], 'UTF-8'); if ($append) { $current = ['value' => $append[1], 'type' => $append[0], 'position' => $pos, 'line' => $append[2]]; if ($literals) { $current['literal'] = Lexer::getLiteral($current['type']); } $tokens[] = $previous = $current; $pos += mb_strlen($append[1], 'UTF-8'); } } foreach ($tokens as $i => &$token) { if ($token['type'] === Lexer::T_STRING && ($replace = self::postTransformNativeStringTokens($tokens, $token, $i))) { $token['type'] = $replace; if ($literals) { $token['literal'] = Lexer::getLiteral($token['type']); } } } return $tokens; }