/** * @param string $input * @param bool $literals * @return array */ public static function tokenize($input, $literals = false) { $original = token_get_all($input); $tokens = []; $original = self::prepareTokens($original); $pos = 1; $previous = null; for ($i = 0; $i < count($original); $i++) { $token = $original[$i]; $next = isset($original[$i + 1]) ? $original[$i + 1] : null; $append = null; if (!is_array($token)) { $token = self::tokenizeChar($token, $previous); } // fix ending spaces foreach (self::$squashableSpaces as $space => $spaceType) { // is value ending with space and begin with something else? if (preg_match('/^([^' . $space . ']+?)(' . $space . ')$/', $token[1], $match)) { // move ending spaces to next spaces token if (is_array($next) && preg_match('/^[' . $space . ']+$/', $next[1])) { $token[1] = $match[1]; $original[$i + 1][1] = $match[2] . $next[1]; $original[$i + 1][2] = $token[2]; // append new space token } else { $token[1] = $match[1]; $append = [$spaceType, $match[2], $token[2]]; } break; } // transform T_WHITESPACE token names if (preg_match('/^(' . $space . ')+$/', $token[1], $match)) { $token[0] = $spaceType; } } $current = ['value' => $token[1], 'type' => $token[0], 'position' => $pos, 'line' => $token[2]]; if ($literals) { $current['literal'] = Lexer::getLiteral($current['type']); } $tokens[] = $previous = $current; $pos += mb_strlen($token[1], 'UTF-8'); if ($append) { $current = ['value' => $append[1], 'type' => $append[0], 'position' => $pos, 'line' => $append[2]]; if ($literals) { $current['literal'] = Lexer::getLiteral($current['type']); } $tokens[] = $previous = $current; $pos += mb_strlen($append[1], 'UTF-8'); } } foreach ($tokens as $i => &$token) { if ($token['type'] === Lexer::T_STRING && ($replace = self::postTransformNativeStringTokens($tokens, $token, $i))) { $token['type'] = $replace; if ($literals) { $token['literal'] = Lexer::getLiteral($token['type']); } } } return $tokens; }
/** * @param \Carrooi\Tokenizer\Parsing\Lexer $lexer * @param mixed $select * @return bool|array|null */ public function _matchToken(Lexer $lexer, $select) { $match = false; if (is_int($select)) { $match = $lexer->isNextToken($select); } elseif ($select instanceof Matcher) { $tokens = array_slice($lexer->tokens, $lexer->position); $match = [$select->match($tokens)]; } elseif ($select instanceof AbstractModifier) { $match = $select->match($lexer); $lexer->resetPeek(); } if ($match === true) { $match = $lexer->lookahead; $lexer->moveNext(); } elseif ($match) { $lastToken = Helpers::getLastToken($match); if ($lastToken) { Helpers::moveLexerToToken($lexer, $lastToken); } } return $match; }
/** * @param \Carrooi\Tokenizer\Parsing\Lexer $lexer * @param array $token */ public static function moveLexerToToken(Lexer $lexer, array $token) { if (!$lexer->lookahead || $lexer->lookahead['position'] > $token['position']) { return; } while ($lexer->lookahead && $lexer->lookahead !== $token) { $lexer->moveNext(); } if ($lexer->lookahead === $token) { $lexer->moveNext(); } }