/** * Return parse info for parse type. * * @return ?map Parse info (NULL: error) */ function _js_parse_js() { // Choice{"FUNCTION" "IDENTIFIER "BRACKET_OPEN" comma_parameters "BRACKET_CLOSE" command | command}* $next = parser_peek(); $program = array(); $program['functions'] = array(); $program['main'] = array(); while (!is_null($next)) { switch ($next) { case 'FUNCTION': $_function = _js_parse_function_dec(); if (is_null($_function)) { return NULL; } foreach ($program['functions'] as $_) { if ($_['name'] == $_function['name']) { js_log_warning('PARSER', 'Duplicated function \'' . $_function['name'] . '\''); } } //log_special('defined',$_function['name']); $program['functions'][] = $_function; // Sometimes happens when people get confused by =function() and function blah() {}; $next_2 = parser_peek(); if ($next_2 == 'COMMAND_TERMINATE') { parser_next(); } break; default: $command = _js_parse_command(); if (is_null($command)) { return NULL; } $program['main'] = array_merge($program['main'], $command); break; } $next = parser_peek(); } return $program; }
/** * Do type checking for something specific. * * @param list List of allowed types * @param string Actual type involved * @param integer Current parse position * @param ?string Specific error message to give (NULL: use default) * @return boolean Whether it type-checks */ function js_ensure_type($_allowed_types, $actual_type, $pos, $alt_error = NULL) { if ($actual_type == '!Object') { return true; } // We can't check it global $JS_PROTOTYPES; // Tidy up our allow list to be a nice map $allowed_types = array('Undefined' => 1, 'Null' => 1); foreach ($_allowed_types as $type) { if ($type == '') { continue; } // Weird if ($type[0] == '!') { $allowed_types += $JS_PROTOTYPES[substr($type, 1)][2]; $allowed_types[substr($type, 1)] = 1; } else { $allowed_types[$type] = 1; } } // The check if (substr($actual_type, 0, 1) == '!') { $actual_type = substr($actual_type, 1); } if (isset($allowed_types[$actual_type])) { return true; } js_log_warning('CHECKER', is_null($alt_error) ? 'Type mismatch' : $alt_error, $pos); return false; }
/** * Lex some Javascript code. * * @param string The code * @return list List of lexed tokens */ function js_lex($text) { global $CONTINUATIONS, $TOKENS, $JS_TAG_RANGES, $JS_VALUE_RANGES, $JS_TEXT, $JS_LEX_TOKENS; // So that we don't have to consider end-of-file states as much. $JS_TEXT = $text . "\n"; $JS_LEX_TOKENS = array(); // We will be lexing into this list of tokens $special_token_value = ''; // This will be used during special lexing modes to build up the special token value being lexed $lex_state = LEXER_FREE; $escape_flag = false; // Used for string_literal escaping // Lex the code. Hard coded state changes occur. Understanding of tokenisation implicit. Trying to match tokens to $TOKENS, otherwise an identifier. $char = ''; $i = 0; while (true) { switch ($lex_state) { case LEXER_FREE: // Jump over any white space in our way do { list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 3; } } while (trim($char) == ''); // We need to know where our token is starting $i--; $i_current = $i; // Try and work out what token we're looking at next $maybe_applicable_tokens = $TOKENS; $applicable_tokens = array(); $token_so_far = ''; while (count($maybe_applicable_tokens) != 0) { list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 3; } $token_so_far .= $char; $_ = $token_so_far[0]; // To strict stupid optimiser // Filter out any tokens that no longer match $cnt = count($JS_LEX_TOKENS); foreach ($maybe_applicable_tokens as $token_name => $token_value) { // Hasn't matched (or otherwise, may still match) if (substr($token_value, 0, strlen($token_so_far)) !== $token_so_far) { unset($maybe_applicable_tokens[$token_name]); } else { // Is it a perfect match? if (strlen($token_so_far) == strlen($token_value) && (!array_key_exists($token_so_far[0], $CONTINUATIONS) || !array_key_exists($JS_TEXT[$i], $CONTINUATIONS))) { if ($token_name != 'FUNCTION' || !isset($JS_LEX_TOKENS[$cnt - 1]) || $JS_LEX_TOKENS[$cnt - 1][0] != 'NEW') { $applicable_tokens[] = $token_name; } unset($maybe_applicable_tokens[$token_name]); } } } } if (in_array('DIV_EQUAL', $applicable_tokens)) { $previous = isset($JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1]) ? $JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1][0] : 'BRACKET_OPEN'; if ($previous == 'BRACKET_OPEN' || $previous == 'COMMA') { $applicable_tokens = array('DIVIDE'); // Actually, a regular expression } } // If we have any applicable tokens, find the longest and move $i so it's as we just read it $i = $i_current; if (count($applicable_tokens) != 0) { usort($applicable_tokens, '_strlen_sort'); $token_found = $applicable_tokens[count($applicable_tokens) - 1]; $i += strlen($TOKENS[$token_found]); // Is it a special state jumping token? if ($token_found == 'START_ML_COMMENT') { $lex_state = LEXER_ML_COMMENT; break; } elseif ($token_found == 'COMMENT') { $lex_state = LEXER_COMMENT; break; } elseif ($token_found == 'DIVIDE' && !in_array(@$JS_LEX_TOKENS[count($JS_LEX_TOKENS) - 1][0], array('number_literal', 'IDENTIFIER', 'EXTRACT_CLOSE', 'BRACKET_CLOSE'))) { $lex_state = LEXER_REGEXP; break; } elseif ($token_found == 'DOUBLE_QUOTE') { $lex_state = LEXER_DOUBLE_QUOTE_STRING_LITERAL; break; } elseif ($token_found == 'SINGLE_QUOTE') { $lex_state = LEXER_SINGLE_QUOTE_STRING_LITERAL; break; } $JS_LEX_TOKENS[] = array($token_found, $i); } else { // Otherwise, we've found an identifier or numerical literal token, so extract it $token_found = ''; $numeric = NULL; do { list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 3; } if (is_null($numeric)) { $numeric = array_key_exists($char, array('0' => 1, '1' => 1, '2' => 1, '3' => 1, '4' => 1, '5' => 1, '6' => 1, '7' => 1, '8' => 1, '9' => 1)); } if (!array_key_exists($char, $CONTINUATIONS) && ($numeric === false || $char != '.' || !is_numeric($JS_TEXT[$i]))) { break; } $token_found .= $char; } while (true); $i--; if ($numeric) { if (strpos($token_found, '.') !== false) { $JS_LEX_TOKENS[] = array('number_literal', floatval($token_found), $i); } elseif (strpos($token_found, 'x') !== false) { $JS_LEX_TOKENS[] = array('number_literal', intval(base_convert($token_found, 16, 10)), $i); } elseif ($token_found[0] == '0') { $JS_LEX_TOKENS[] = array('number_literal', intval(base_convert($token_found, 8, 10)), $i); } else { $JS_LEX_TOKENS[] = array('number_literal', intval($token_found), $i); } $JS_VALUE_RANGES[] = array($i - strlen($token_found), $i); } else { if ($token_found == '') { js_log_warning('LEXER', 'Bad token found', $i, true); return array(); } $JS_LEX_TOKENS[] = array('IDENTIFIER', $token_found, $i); $JS_TAG_RANGES[] = array($i - strlen($token_found), $i); } } break; case LEXER_COMMENT: list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 2; } // Exit case if ($char == chr(10)) { $lex_state = LEXER_FREE; $JS_LEX_TOKENS[] = array('comment', $special_token_value, $i); $special_token_value = ''; $i--; break; } // Normal case $special_token_value .= $char; break; case LEXER_ML_COMMENT: list($reached_end, $i, $char) = lex__get_next_chars($i, 2); if ($reached_end) { break 2; } // Exit case if ($char == '*/') { $lex_state = LEXER_FREE; $JS_LEX_TOKENS[] = array('comment', $special_token_value, $i); $special_token_value = ''; break; } $i -= 1; if (!isset($char[0])) { break 2; } $char = $char[0]; // Normal case $special_token_value .= $char; break; case LEXER_REGEXP: list($reached_end, $i, $char) = lex__get_next_chars($i, 1); if ($reached_end) { break 2; } // Exit case if ($char == '/' && ($i < 2 || $JS_TEXT[$i - 2] != '\\' || $JS_TEXT[$i - 3] == '\\')) { do { list($reached_end, $i, $char) = lex__get_next_chars($i, 1); } while ($char == 'g' || $char == 'i' || $char == 'm'); $i--; $lex_state = LEXER_FREE; $JS_LEX_TOKENS[] = array('NEW', $i); $JS_LEX_TOKENS[] = array('IDENTIFIER', 'RegExp', $i); $JS_LEX_TOKENS[] = array('BRACKET_OPEN', $i); $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i); $JS_LEX_TOKENS[] = array('BRACKET_CLOSE', $i); $JS_VALUE_RANGES[] = array($i - strlen($special_token_value), $i); $special_token_value = ''; break; } // Normal case $special_token_value .= $char; break; case LEXER_DOUBLE_QUOTE_STRING_LITERAL: list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 2; } if ($char == "\n" && (strlen($special_token_value) == 0 || $special_token_value[strlen($special_token_value) - 1] == '\\')) { js_log_warning('LEXER', 'String literals may not contain explicit new lines without special escaping', $i, true); } // Exit case if ($char == '"' && !$escape_flag) { $lex_state = LEXER_FREE; $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i); $JS_VALUE_RANGES[] = array($i - strlen($special_token_value) - 1, $i - 1); $special_token_value = ''; break; } // Escape flag based filtering $actual_char = $char; if ($escape_flag) { if ($char == 'n') { $actual_char = "\n"; } elseif ($char == 'r') { $actual_char = "\r"; } elseif ($char == 't') { $actual_char = "\t"; } } else { if ($char == '\\') { $actual_char = ''; } } // Normal case $special_token_value .= $actual_char; $escape_flag = !$escape_flag && $char == '\\'; break; case LEXER_SINGLE_QUOTE_STRING_LITERAL: list($reached_end, $i, $char) = lex__get_next_char($i); if ($reached_end) { break 2; } if ($char == "\n") { js_log_warning('LEXER', 'String literals may not contain explicit new lines', $i, true); } // Exit case if ($char == "'" && !$escape_flag) { $lex_state = LEXER_FREE; $JS_LEX_TOKENS[] = array('string_literal', $special_token_value, $i); $JS_VALUE_RANGES[] = array($i - strlen($special_token_value) - 1, $i - 1); $special_token_value = ''; break; } // Escape flag based filtering $actual_char = $char; if ($escape_flag) { if ($char == "'") { $actual_char = "'"; } elseif ($char == '\\') { $actual_char = '\\'; } else { $actual_char = '\\' . $char; } } elseif ($char == '\\') { $actual_char = ''; } // Normal case $special_token_value .= $actual_char; $escape_flag = !$escape_flag && $char == '\\'; break; } } return $JS_LEX_TOKENS; }