/** Dumps abstract syntax tree */ function ast_dump($ast, int $options = 0) : string { if ($ast instanceof ast\Node) { $result = ast\get_kind_name($ast->kind); if ($options & AST_DUMP_LINENOS) { $result .= " @ {$ast->lineno}"; if (isset($ast->endLineno)) { $result .= "-{$ast->endLineno}"; } } if (ast\kind_uses_flags($ast->kind)) { $result .= "\n flags: " . format_flags($ast->kind, $ast->flags); } if (isset($ast->name)) { $result .= "\n name: {$ast->name}"; } if (isset($ast->docComment)) { $result .= "\n docComment: {$ast->docComment}"; } foreach ($ast->children as $i => $child) { $result .= "\n {$i}: " . str_replace("\n", "\n ", ast_dump($child, $options)); } return $result; } else { if ($ast === null) { return 'null'; } else { if (is_string($ast)) { return "\"{$ast}\""; } else { return (string) $ast; } } } }
function ast_dump($ast, $children = true) { if ($ast instanceof \ast\Node) { $result = \ast\get_kind_name($ast->kind); $result .= " @ {$ast->lineno}"; if (isset($ast->endLineno)) { $result .= "-{$ast->endLineno}"; } if (\ast\kind_uses_flags($ast->kind)) { $result .= "\n flags: {$ast->flags}"; } if (isset($ast->name)) { $result .= "\n name: {$ast->name}"; } if (isset($ast->docComment)) { $result .= "\n docComment: {$ast->docComment}"; } if ($children) { foreach ($ast->children as $i => $child) { $result .= "\n {$i}: " . str_replace("\n", "\n ", ast_dump($child)); } } return $result; } else { if ($ast === null) { return 'null'; } else { if (is_string($ast)) { return "\"{$ast}\""; } else { return (string) $ast; } } } }
function node_paramlist($file, $node, &$req, &$opt, $dc, $namespace) { if ($node instanceof \ast\Node) { $result = []; $i = 0; foreach ($node->children as $param_node) { $result[] = node_param($file, $param_node, $dc, $i, $namespace); if ($param_node->children[2] === null) { if ($opt) { Log::err(Log::EPARAM, "required arg follows optional", $file, $node->lineno); } $req++; } else { $opt++; } $i++; } return $result; } assert(false, ast_dump($node) . " was not an \\ast\\Node"); }
function var_assign($file, $namespace, $ast, $current_scope, $current_class, &$vars) { global $classes, $functions, $scope; $left = $ast->children[0]; $right = $ast->children[1]; $left_type = $right_type = null; $parent = $ast; $taint = false; // Deal with $a=$b=$c=1; and trickle the right-most value to the top through recursion if ($right instanceof \ast\Node && $right->kind == \ast\AST_ASSIGN) { $right_type = var_assign($file, $namespace, $right, $current_scope, $current_class, $vars); } if ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR && $left->kind != \ast\AST_STATIC_PROP) { // Walk multi-level arrays and chained stuff // eg. $var->prop[1][2]->prop while ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR) { $parent = $left; $left = $left->children[0]; } // If we see $var[..] then we know $var is an array # if($parent->kind == \ast\AST_DIM) $left_type = 'array'; // TODO: unless it was already a string, then it is a string offset # else if($parent->kind == \ast\AST_LIST) $left_type = ''; // TODO: Properly handle list($a) = [$b] # else if($parent->kind == \ast\AST_PROP) { $left_type = "object"; } } if ($left == null) { // No variable name for assignment?? // This is generally for something like list(,$var) = [1,2] return $right_type; } if (!is_object($left)) { if ($left == "self") { // TODO: Looks like a self::$var assignment - do something smart here return $right_type; } else { if ($left == 'static') { // TODO: static::$prop assignment return $right_type; } else { return $right_type; } } } // DEBUG if (!$left instanceof \ast\Node) { echo "Check this {$file}\n" . ast_dump($left) . "\n" . ast_dump($parent) . "\n"; } if ($left->kind == \ast\AST_STATIC_PROP && $left->children[0]->kind == \ast\AST_NAME) { if ($left->children[1] instanceof \ast\Node) { // This is some sort of self::${$key} thing, give up return $right_type; } if ($left->children[0]->flags & \ast\flags\NAME_NOT_FQ) { $left_name = $namespace . $left->children[0]->children[0] . '::' . $left->children[1]; } else { $left_name = $left->children[0]->children[0] . '::' . $left->children[1]; } } else { $left_name = $left->children[0]; } if ($right instanceof \ast\Node && $right->kind == \ast\AST_CLOSURE) { $right_type = 'callable:{closure ' . $right->id . '}'; $vars[$left_name]['type'] = $right_type; $vars[$left_name]['tainted'] = false; $vars[$left_name]['tainted_by'] = ''; return $right_type; } if (!$left_type && $right_type) { $left_type = $right_type; } else { if (!$left_type) { // We didn't figure out the type simply by looking at the left side of the assignment, check the right $right_type = node_type($file, $namespace, $right, $current_scope, $current_class, $taint); $left_type = $right_type; } } // $var->prop = ... if ($parent->kind == \ast\AST_PROP && $left->kind == \ast\AST_VAR) { // Check for $$var-> weirdness if (!$left->children[0] instanceof \ast\Node) { $prop = $parent->children[1]; // Check for $var->$... if (!$prop instanceof \ast\Node) { if ($left->children[0] == 'this') { // $this->prop = $lclass = strtolower($current_class['name']); if (empty($classes[$lclass]['properties'][$prop])) { $classes[$lclass]['properties'][$prop] = ['flags' => \ast\flags\MODIFIER_PUBLIC, 'name' => $prop, 'lineno' => 0, 'value' => $right_type]; } else { $classes[$lclass]['properties'][$prop]['value'] = merge_type($classes[$lclass]['properties'][$prop]['value'], $right_type); } return $right_type; } else { // $var->prop = $temp = node_type($file, $namespace, $left, $current_scope, $current_class, $taint); if (!is_native_type($temp)) { $lclass = strtolower($temp); if (!empty($classes[$lclass])) { if (empty($classes[$lclass]['properties'][$prop])) { $classes[$lclass]['properties'][$prop] = ['flags' => \ast\flags\MODIFIER_PUBLIC, 'name' => $prop, 'lineno' => 0, 'value' => $right_type]; } else { $classes[$lclass]['properties'][$prop]['value'] = merge_type($classes[$lclass]['properties'][$prop]['value'], $right_type); } return $right_type; } } } } } } if ($left_name instanceof \ast\Node) { // TODO: Deal with $$var } else { $vars[$left_name]['type'] = $left_type ?? ''; $vars[$left_name]['tainted'] = $taint; $vars[$left_name]['tainted_by'] = $taint ? "{$file}:{$left->lineno}" : ''; } return $right_type; }
function var_assign($file, $namespace, $ast, $current_scope, $current_class, &$vars) { global $classes, $functions, $scope; $left = $ast->children[0]; $right = $ast->children[1]; $left_type = $right_type = null; $parent = $ast; $taint = false; // Deal with $a=$b=$c=1; and trickle the right-most value to the top through recursion if ($right instanceof \ast\Node && $right->kind == \ast\AST_ASSIGN) { $right_type = var_assign($file, $namespace, $right, $current_scope, $current_class, $vars); } if ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR && $left->kind != \ast\AST_STATIC_PROP) { // Walk multi-level arrays and chained stuff // eg. $var->prop[1][2]->prop while ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR) { $parent = $left; $left = $left->children[0]; } } if ($left == null) { // No variable name for assignment?? // This is generally for something like list(,$var) = [1,2] return $right_type; } if (!is_object($left)) { if ($left == "self") { // TODO: Looks like a self::$var assignment - do something smart here return $right_type; } else { if ($left == 'static') { // TODO: static::$prop assignment return $right_type; } else { return $right_type; } } } else { if ($left->kind == \ast\AST_LIST) { return ''; } } // DEBUG if (!$left instanceof \ast\Node) { echo "Check this {$file}\n" . ast_dump($left) . "\n" . ast_dump($parent) . "\n"; } if ($left->kind == \ast\AST_STATIC_PROP && $left->children[0]->kind == \ast\AST_NAME) { if ($left->children[1] instanceof \ast\Node) { // This is some sort of self::${$key} thing, give up return $right_type; } if ($left->children[0]->flags & \ast\flags\NAME_NOT_FQ) { $left_name = $namespace . $left->children[0]->children[0] . '::' . $left->children[1]; } else { $left_name = $left->children[0]->children[0] . '::' . $left->children[1]; } } else { $left_name = $left->children[0]; } if ($right instanceof \ast\Node && $right->kind == \ast\AST_CLOSURE) { $right_type = 'callable:{closure ' . $right->id . '}'; $vars[$left_name]['type'] = $right_type; $vars[$left_name]['tainted'] = false; $vars[$left_name]['tainted_by'] = ''; return $right_type; } if (!$left_type && $right_type) { $left_type = $right_type; } else { if (!$left_type) { // We didn't figure out the type simply by looking at the left side of the assignment, check the right $right_type = node_type($file, $namespace, $right, $current_scope, $current_class, $taint); $left_type = $right_type; } } if ($parent->kind == \ast\AST_DIM && $left->kind == \ast\AST_VAR) { // Generics check - can't really do this without some special hint forcing a strict generics type /* if(!($left->children[0] instanceof \ast\Node)) { if($right_type === "NULL") return ''; // You can assign null to any generic $var_type = $scope[$current_scope]['vars'][$left->children[0]]['type'] ?? ''; if(!empty($var_type) && !nongenerics($var_type) && strpos($var_type, '[]') !== false) { if(!type_check($right_type, generics($var_type))) { Log::err(Log::ETYPE, "Assigning {$right_type} to \${$left->children[0]} which is {$var_type}", $file, $ast->lineno); return ''; } } else { $left_type = mkgenerics($right_type); } } */ $left_type = mkgenerics($right_type); } // $var->prop = ... if ($parent->kind == \ast\AST_PROP && $left->kind == \ast\AST_VAR) { // Check for $$var-> weirdness if (!$left->children[0] instanceof \ast\Node) { $prop = $parent->children[1]; // Check for $var->$... if (!$prop instanceof \ast\Node) { $lclass = ''; if ($left->children[0] == 'this') { // $this->prop = $class_name = $current_class['name']; } else { $class_name = find_class_name($file, $parent, $namespace, $current_class, $current_scope); } $lclass = strtolower($class_name); if (empty($lclass) || empty($classes[$lclass])) { return ''; } $ltemp = find_property($file, $ast, $class_name, $prop, $current_class); if ($ltemp === false) { return $right_type; } if (!empty($ltemp)) { $lclass = $ltemp; } if (empty($classes[$lclass]['properties'][$prop])) { $classes[$lclass]['properties'][$prop] = ['flags' => \ast\flags\MODIFIER_PUBLIC, 'name' => $prop, 'lineno' => 0, 'type' => $right_type]; } else { if (!empty($classes[$lclass]['properties'][$prop]['dtype'])) { if ($ast->children[0]->kind == \ast\AST_DIM) { $right_type = mkgenerics($right_type); } if (!type_check(all_types($right_type), all_types($classes[$lclass]['properties'][$prop]['dtype']))) { Log::err(Log::ETYPE, "property is declared to be {$classes[$lclass]['properties'][$prop]['dtype']} but was assigned {$right_type}", $file, $ast->lineno); } } $classes[$lclass]['properties'][$prop]['type'] = merge_type($classes[$lclass]['properties'][$prop]['type'], $right_type); } return $right_type; } } } if ($left_name instanceof \ast\Node) { // TODO: Deal with $$var } else { $vars[$left_name]['type'] = $left_type ?? ''; $vars[$left_name]['tainted'] = $taint; $vars[$left_name]['tainted_by'] = $taint ? "{$file}:{$left->lineno}" : ''; } return $right_type; }
function var_assign($file, $ast, $current_scope, &$vars) { global $classes, $functions, $namespace, $scope; $left = $ast->children[0]; $right = $ast->children[1]; $left_type = $right_type = null; $parent = $ast; $taint = false; // Deal with $a=$b=$c=1; and trickle the right-most value to the top through recursion if ($right instanceof \ast\Node && $right->kind == \ast\AST_ASSIGN) { $right_type = var_assign($file, $right, $current_scope, $vars); } if ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR && $left->kind != \ast\AST_STATIC_PROP) { // Walk multi-level arrays and chained stuff // eg. $var->prop[1][2]->prop while ($left instanceof \ast\Node && $left->kind != \ast\AST_VAR) { $parent = $left; $left = $left->children[0]; } // If we see $var[..] then we know $var is an array if ($parent->kind == \ast\AST_DIM) { $left_type = 'array'; } else { if ($parent->kind == \ast\AST_LIST) { $left_type = 'mixed'; } else { if ($parent->kind == \ast\AST_PROP) { $left_type = "object"; } } } } if ($left == null) { // No variable name for assignment?? // This is generally for something like list(,$var) = [1,2] return $right_type; } if (!is_object($left)) { if ($left == "self") { // TODO: Looks like a self::$var assignment - do something smart here return $right_type; } else { if ($left == 'static') { // TODO: static::$prop assignment return $right_type; } else { return $right_type; } } } // DEBUG if (!$left instanceof \ast\Node) { echo "Check this {$file}\n" . ast_dump($left) . "\n" . ast_dump($parent) . "\n"; } if ($left->kind == \ast\AST_STATIC_PROP && $left->children[0]->kind == \ast\AST_NAME) { if ($left->children[1] instanceof \ast\Node) { // This is some sort of self::${$key} thing, give up return $right_type; } $left_name = $namespace . $left->children[0]->children[0] . '::' . $left->children[1]; } else { $left_name = $left->children[0]; } if ($right instanceof \ast\Node && $right->kind == \ast\AST_CLOSURE) { $right_type = 'callable:{closure ' . $right->id . '}'; $vars[$left_name]['type'] = $right_type; $vars[$left_name]['tainted'] = false; $vars[$left_name]['tainted_by'] = ''; return $right_type; } if (!$left_type && $right_type) { $left_type = $right_type; } else { if (!$left_type) { // We didn't figure out the type simply by looking at the left side of the assignment, check the right $right_type = node_type($file, $right, $current_scope, $taint); $left_type = $right_type; } } if ($left_name instanceof \ast\Node) { // TODO: Deal with $$var } else { $vars[$left_name]['type'] = $left_type ?? 'mixed'; $vars[$left_name]['tainted'] = $taint; $vars[$left_name]['tainted_by'] = $taint ? "{$file}:{$left->lineno}" : ''; } return $right_type; }
function node_paramlist($file, $node, &$req, &$opt, $dc, $namespace) { if ($node instanceof \ast\Node) { $result = []; $i = 0; $null_opt = false; foreach ($node->children as $param_node) { $result[] = node_param($file, $param_node, $dc, $i, $namespace); if ($param_node->children[2] === null) { if ($opt && !$null_opt) { Log::err(Log::EPARAM, "required arg follows optional", $file, $node->lineno); } $req++; } else { if ($param_node->children[2] instanceof \ast\Node && $param_node->children[2]->kind == \ast\AST_CONST && ($param_node->children[2]->children[0] instanceof \ast\Node && $param_node->children[2]->children[0]->kind == \ast\AST_NAME) && $param_node->children[2]->children[0]->children[0] === 'null') { $null_opt = true; } else { $null_opt = false; } $opt++; } $i++; } return $result; } assert(false, ast_dump($node) . " was not an \\ast\\Node"); }
/** * Parses and generates an AST for a single file. * * @param $path Path to the file * @param $cvsexporter A CSV exporter instance to use for exporting * the AST of the parsed file. * * @return The node index of the exported file node, or -1 if there * was an error. */ function parse_file($path, $csvexporter) : int { $finfo = new SplFileInfo($path); echo "Parsing file ", $finfo->getPathname(), PHP_EOL; try { $ast = ast\parse_file($path, 10); // The above may throw a ParseError. We only export to CSV if that // didn't happen. $fnode = $csvexporter->store_filenode($finfo->getFilename()); $astroot = $csvexporter->export($ast); $csvexporter->store_rel($fnode, $astroot, "FILE_OF"); echo ast_dump($ast), PHP_EOL; } catch (ParseError $e) { $fnode = -1; error_log("[ERROR] In {$path}: " . $e->getMessage()); } return $fnode; }