Beispiel #1
0
/**
 * Return parse info for parse type.
 *
 * @return ?map			Parse info (NULL: error)
 */
function _js_parse_js()
{
    // Choice{"FUNCTION" "IDENTIFIER "BRACKET_OPEN" comma_parameters "BRACKET_CLOSE" command | command}*
    $next = parser_peek();
    $program = array();
    $program['functions'] = array();
    $program['main'] = array();
    while (!is_null($next)) {
        switch ($next) {
            case 'FUNCTION':
                $_function = _js_parse_function_dec();
                if (is_null($_function)) {
                    return NULL;
                }
                foreach ($program['functions'] as $_) {
                    if ($_['name'] == $_function['name']) {
                        js_log_warning('PARSER', 'Duplicated function \'' . $_function['name'] . '\'');
                    }
                }
                //log_special('defined',$_function['name']);
                $program['functions'][] = $_function;
                // Sometimes happens when people get confused by =function() and function blah() {};
                $next_2 = parser_peek();
                if ($next_2 == 'COMMAND_TERMINATE') {
                    parser_next();
                }
                break;
            default:
                $command = _js_parse_command();
                if (is_null($command)) {
                    return NULL;
                }
                $program['main'] = array_merge($program['main'], $command);
                break;
        }
        $next = parser_peek();
    }
    return $program;
}
Beispiel #2
0
/**
 * Do type checking for something specific.
 *
 * @param  list				List of allowed types
 * @param  string				Actual type involved
 * @param  integer			Current parse position
 * @param  ?string			Specific error message to give (NULL: use default)
 * @return boolean			Whether it type-checks
 */
function js_ensure_type($_allowed_types, $actual_type, $pos, $alt_error = NULL)
{
    if ($actual_type == '!Object') {
        return true;
    }
    // We can't check it
    global $JS_PROTOTYPES;
    // Tidy up our allow list to be a nice map
    $allowed_types = array('Undefined' => 1, 'Null' => 1);
    foreach ($_allowed_types as $type) {
        if ($type == '') {
            continue;
        }
        // Weird
        if ($type[0] == '!') {
            $allowed_types += $JS_PROTOTYPES[substr($type, 1)][2];
            $allowed_types[substr($type, 1)] = 1;
        } else {
            $allowed_types[$type] = 1;
        }
    }
    // The check
    if (substr($actual_type, 0, 1) == '!') {
        $actual_type = substr($actual_type, 1);
    }
    if (isset($allowed_types[$actual_type])) {
        return true;
    }
    js_log_warning('CHECKER', is_null($alt_error) ? 'Type mismatch' : $alt_error, $pos);
    return false;
}
Beispiel #3
0
/**
 * Lex some Javascript code.
 *
 * @param  string			The code
 * @return list			List of lexed tokens
 */
function js_lex($text)
{
    global $CONTINUATIONS, $TOKENS, $JS_TAG_RANGES, $JS_VALUE_RANGES, $JS_TEXT, $JS_LEX_TOKENS;
    // So that we don't have to consider end-of-file states as much.
    $JS_TEXT = $text . "\n";
    $JS_LEX_TOKENS = array();
    // We will be lexing into this list of tokens
    $special_token_value = '';
    // This will be used during special lexing modes to build up the special token value being lexed
    $lex_state = LEXER_FREE;
    $escape_flag = false;
    // Used for string_literal escaping
    // Lex the code. Hard coded state changes occur. Understanding of tokenisation implicit. Trying to match tokens to $TOKENS, otherwise an identifier.
    $char = '';
    $i = 0;
    while (true) {
        switch ($lex_state) {
            case LEXER_FREE:
                // Jump over any white space in our way
                do {
                    list($reached_end, $i, $char) = lex__get_next_char($i);
                    if ($reached_end) {
                        break 3;
                    }
                } while (trim($char) == '');
                // We need to know where our token is starting
                $i--;
                $i_current = $i;
                // Try and work out what token we're looking at next
                $maybe_applicable_tokens = $TOKENS;
                $applicable_tokens = array();
                $token_so_far = '';
                while (count($maybe_applicable_tokens) != 0) {
                    list($reached_end, $i, $char) = lex__get_next_char($i);
                    if ($reached_end) {
                        break 3;
                    }
                    $token_so_far .= $char;
                    $_ = $token_so_far[0];
                    // To strict stupid optimiser
                    // Filter out any tokens that no longer match
                    $cnt = count($JS_LEX_TOKENS);
                    foreach ($maybe_applicable_tokens as $token_name => $token_value) {
                        // Hasn't matched (or otherwise, may still match)
                        if (substr($token_value, 0, strlen($token_so_far)) !== $token_so_far) {
                            unset($maybe_applicable_tokens[$token_name]);
                        } else {
                            // Is it a perfect match?
                            if (strlen($token_so_far) == strlen($token_value) && (!array_key_exists($token_so_far[0], $CONTINUATIONS) || !array_key_exists($JS_TEXT[$i], $CONTINUATIONS))) {
                                if ($token_name != 'FUNCTION' || !isset($JS_LEX_TOKENS[$cnt - 1]) || $JS_LEX_TOKENS[$cnt - 1][0] != 'NEW') {
                                    $applicable_tokens[] = $token_name;
                                }
                                unset($maybe_applicable_tokens[$token_name]);
                            }
                        }
                    }
                }
                if (in_array('DIV_EQUAL', $applicable_tokens)) {
                    $previous = isset($JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1]) ? $JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1][0] : 'BRACKET_OPEN';
                    if ($previous == 'BRACKET_OPEN' || $previous == 'COMMA') {
                        $applicable_tokens = array('DIVIDE');
                        // Actually, a regular expression
                    }
                }
                // If we have any applicable tokens, find the longest and move $i so it's as we just read it
                $i = $i_current;
                if (count($applicable_tokens) != 0) {
                    usort($applicable_tokens, '_strlen_sort');
                    $token_found = $applicable_tokens[count($applicable_tokens) - 1];
                    $i += strlen($TOKENS[$token_found]);
                    // Is it a special state jumping token?
                    if ($token_found == 'START_ML_COMMENT') {
                        $lex_state = LEXER_ML_COMMENT;
                        break;
                    } elseif ($token_found == 'COMMENT') {
                        $lex_state = LEXER_COMMENT;
                        break;
                    } elseif ($token_found == 'DIVIDE' && !in_array(@$JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1][0], array('number_literal', 'IDENTIFIER', 'EXTRACT_CLOSE', 'BRACKET_CLOSE'))) {
                        $lex_state = LEXER_REGEXP;
                        break;
                    } elseif ($token_found == 'DOUBLE_QUOTE') {
                        $lex_state = LEXER_DOUBLE_QUOTE_STRING_LITERAL;
                        break;
                    } elseif ($token_found == 'SINGLE_QUOTE') {
                        $lex_state = LEXER_SINGLE_QUOTE_STRING_LITERAL;
                        break;
                    }
                    $JS_LEX_TOKENS[] = array($token_found, $i);
                } else {
                    // Otherwise, we've found an identifier or numerical literal token, so extract it
                    $token_found = '';
                    $numeric = NULL;
                    do {
                        list($reached_end, $i, $char) = lex__get_next_char($i);
                        if ($reached_end) {
                            break 3;
                        }
                        if (is_null($numeric)) {
                            $numeric = array_key_exists($char, array('0' => 1, '1' => 1, '2' => 1, '3' => 1, '4' => 1, '5' => 1, '6' => 1, '7' => 1, '8' => 1, '9' => 1));
                        }
                        if (!array_key_exists($char, $CONTINUATIONS) && ($numeric === false || $char != '.' || !is_numeric($JS_TEXT[$i]))) {
                            break;
                        }
                        $token_found .= $char;
                    } while (true);
                    $i--;
                    if ($numeric) {
                        if (strpos($token_found, '.') !== false) {
                            $JS_LEX_TOKENS[] = array('number_literal', floatval($token_found), $i);
                        } elseif (strpos($token_found, 'x') !== false) {
                            $JS_LEX_TOKENS[] = array('number_literal', intval(base_convert($token_found, 16, 10)), $i);
                        } elseif ($token_found[0] == '0') {
                            $JS_LEX_TOKENS[] = array('number_literal', intval(base_convert($token_found, 8, 10)), $i);
                        } else {
                            $JS_LEX_TOKENS[] = array('number_literal', intval($token_found), $i);
                        }
                        $JS_VALUE_RANGES[] = array($i - strlen($token_found), $i);
                    } else {
                        if ($token_found == '') {
                            js_log_warning('LEXER', 'Bad token found', $i, true);
                            return array();
                        }
                        $JS_LEX_TOKENS[] = array('IDENTIFIER', $token_found, $i);
                        $JS_TAG_RANGES[] = array($i - strlen($token_found), $i);
                    }
                }
                break;
            case LEXER_COMMENT:
                list($reached_end, $i, $char) = lex__get_next_char($i);
                if ($reached_end) {
                    break 2;
                }
                // Exit case
                if ($char == chr(10)) {
                    $lex_state = LEXER_FREE;
                    $JS_LEX_TOKENS[] = array('comment', $special_token_value, $i);
                    $special_token_value = '';
                    $i--;
                    break;
                }
                // Normal case
                $special_token_value .= $char;
                break;
            case LEXER_ML_COMMENT:
                list($reached_end, $i, $char) = lex__get_next_chars($i, 2);
                if ($reached_end) {
                    break 2;
                }
                // Exit case
                if ($char == '*/') {
                    $lex_state = LEXER_FREE;
                    $JS_LEX_TOKENS[] = array('comment', $special_token_value, $i);
                    $special_token_value = '';
                    break;
                }
                $i -= 1;
                if (!isset($char[0])) {
                    break 2;
                }
                $char = $char[0];
                // Normal case
                $special_token_value .= $char;
                break;
            case LEXER_REGEXP:
                list($reached_end, $i, $char) = lex__get_next_chars($i, 1);
                if ($reached_end) {
                    break 2;
                }
                // Exit case
                if ($char == '/' && ($i < 2 || $JS_TEXT[$i - 2] != '\\' || $JS_TEXT[$i - 3] == '\\')) {
                    do {
                        list($reached_end, $i, $char) = lex__get_next_chars($i, 1);
                    } while ($char == 'g' || $char == 'i' || $char == 'm');
                    $i--;
                    $lex_state = LEXER_FREE;
                    $JS_LEX_TOKENS[] = array('NEW', $i);
                    $JS_LEX_TOKENS[] = array('IDENTIFIER', 'RegExp', $i);
                    $JS_LEX_TOKENS[] = array('BRACKET_OPEN', $i);
                    $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i);
                    $JS_LEX_TOKENS[] = array('BRACKET_CLOSE', $i);
                    $JS_VALUE_RANGES[] = array($i - strlen($special_token_value), $i);
                    $special_token_value = '';
                    break;
                }
                // Normal case
                $special_token_value .= $char;
                break;
            case LEXER_DOUBLE_QUOTE_STRING_LITERAL:
                list($reached_end, $i, $char) = lex__get_next_char($i);
                if ($reached_end) {
                    break 2;
                }
                if ($char == "\n" && (strlen($special_token_value) == 0 || $special_token_value[strlen($special_token_value) - 1] == '\\')) {
                    js_log_warning('LEXER', 'String literals may not contain explicit new lines without special escaping', $i, true);
                }
                // Exit case
                if ($char == '"' && !$escape_flag) {
                    $lex_state = LEXER_FREE;
                    $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i);
                    $JS_VALUE_RANGES[] = array($i - strlen($special_token_value) - 1, $i - 1);
                    $special_token_value = '';
                    break;
                }
                // Escape flag based filtering
                $actual_char = $char;
                if ($escape_flag) {
                    if ($char == 'n') {
                        $actual_char = "\n";
                    } elseif ($char == 'r') {
                        $actual_char = "\r";
                    } elseif ($char == 't') {
                        $actual_char = "\t";
                    }
                } else {
                    if ($char == '\\') {
                        $actual_char = '';
                    }
                }
                // Normal case
                $special_token_value .= $actual_char;
                $escape_flag = !$escape_flag && $char == '\\';
                break;
            case LEXER_SINGLE_QUOTE_STRING_LITERAL:
                list($reached_end, $i, $char) = lex__get_next_char($i);
                if ($reached_end) {
                    break 2;
                }
                if ($char == "\n") {
                    js_log_warning('LEXER', 'String literals may not contain explicit new lines', $i, true);
                }
                // Exit case
                if ($char == "'" && !$escape_flag) {
                    $lex_state = LEXER_FREE;
                    $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i);
                    $JS_VALUE_RANGES[] = array($i - strlen($special_token_value) - 1, $i - 1);
                    $special_token_value = '';
                    break;
                }
                // Escape flag based filtering
                $actual_char = $char;
                if ($escape_flag) {
                    if ($char == "'") {
                        $actual_char = "'";
                    } elseif ($char == '\\') {
                        $actual_char = '\\';
                    } else {
                        $actual_char = '\\' . $char;
                    }
                } elseif ($char == '\\') {
                    $actual_char = '';
                }
                // Normal case
                $special_token_value .= $actual_char;
                $escape_flag = !$escape_flag && $char == '\\';
                break;
        }
    }
    return $JS_LEX_TOKENS;
}