public function parse($str) { $tokenizer = new Tokenizer($str); $tokens = $tokenizer->getTokens(); $this->result = $this->parseTokens($tokens, $this->baseData); return $this->result; }
public function testExpectStepsToNextToken() { $token1 = new Token(Token::IDENTIFIER, 'foo'); $token2 = new Token(Token::IDENTIFIER, 'bar'); $this->mockTokenizer->expects($this->exactly(3))->method('nextToken')->will($this->onConsecutiveCalls($token1, $token2, new Token(Token::EOF))); $stream = new Stream($this->mockTokenizer); $this->assertSame($token1, $stream->expect(Token::IDENTIFIER, 'foo')); $this->assertSame($token2, $stream->expect(Token::IDENTIFIER, 'bar')); }
/** * @throws \Exception When tokenizer throws it while parsing */ public function parse($string) { $tokenizer = new Tokenizer(); $stream = new TokenStream($tokenizer->tokenize($string), $string); try { return $this->parseSelectorGroup($stream); } catch (\Exception $e) { $class = get_class($e); throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e); } }
/** * @since 0.1 * * {@inheritDoc} */ public function tokenize($string) { if ($this->tokenizer !== null) { $string = implode(" ", $this->tokenizer->tokenize($string)); } $result = $this->createNGrams($string, $this->ngramSize, $this->withMarker); if ($result !== false) { return $result; } return array(); }
/** * @since 0.1 * * @param string $string * * @return array|false */ public function tokenize($string) { if ($this->tokenizer !== null) { $string = implode(" ", $this->tokenizer->tokenize($string)); } $pattern = str_replace($this->patternExemption, '', '_-・,、;:!?.。…◆★◇□■()【】《》〈〉;:“”"〃'`[]{}「」@*\/&#%`^+<=>|~≪≫─$"_\\-・,、;:!?.。()[\\]{}「」@*\\/&#%`^+<=>|~«»$"\\s'); $result = preg_split('/[' . $pattern . ']+/u', $string, null, PREG_SPLIT_NO_EMPTY); if ($result === false) { $result = array(); } return $result; }
private function token() { try { $lexer = new Tokenizer($this->source); $token = $lexer->nextToken(); while ($token->key !== Tokenizer::EOF_TYPE) { echo $token; $token = $lexer->nextToken(); } } catch (LexerError $e) { echo $e->getMessage(); } }
/** * @since 0.1 * * {@inheritDoc} */ public function tokenize($string) { if ($this->tokenizer !== null) { $string = implode(" ", $this->tokenizer->tokenize($string)); } // Filter is based on https://github.com/kitech/cms-drupal/blob/master/modules/csplitter/filter.txt $pattern = str_replace($this->patternExemption, '', '([\\s\\、,,。/?《》〈〉;:“”"〃'`[]{}\|~!-=_+)(()*…—─%¥…◆★◇□■【】#·啊吧把并被才从的得当对但到地而该过个给还和叫将就可来了啦里没你您哪那呢去却让使是时省随他我为现县向像象要由矣已以也又与于在之这则最乃\\/\\(\\)\\[\\]{}<>\\r\\n"]|(?<!\\d)\\.(?!\\d))'); $result = preg_split('/' . $pattern . '/u', $string, null, PREG_SPLIT_NO_EMPTY); if ($result !== false) { return $result; } return array(); }
/** * @since 0.1 * * @param string $string * * @return array|false */ public function tokenize($string) { if ($this->tokenizer !== null) { $string = implode(" ", $this->tokenizer->tokenize($string)); } // (?<=\p{L})(?=\p{N}) to split alphanumeric and numeric $pattern = str_replace($this->patternExemption, '', '([\\s\\-_,:;?!%\'\\|\\/\\(\\)\\[\\]{}<>\\r\\n"]|(?<!\\d)\\.(?!\\d)|(?<=\\p{L})(?=\\p{N}))'); $result = preg_split('/' . $pattern . '/u', $string, null, PREG_SPLIT_NO_EMPTY); if ($result === false) { $result = array(); } return $result; }
public function parse($code) { // get tokens $tokens = $this->tokenizer->tokenize($code); // preparations $tokens = $this->lexer->filterTokens($tokens); $tokens = $this->lexer->repair($tokens); // helpers $this->tracker = new TokenTracker($tokens, $this->context); $this->tokens = $tokens; // analyze $this->analyzer->analyze($tokens); $this->context->reset(); }
/** * Test the parsing of the valid "if" block in the string */ public function testValidParsingRawBlock() { $t = new Tokenizer(); $document = 'this is a {% raw %} inside {% endraw %} here'; $tokens = $t->execute($document); // The result should be an array with a nested array for the "if" start/end $this->assertTrue(is_array($tokens) && isset($tokens[0][0])); $this->assertCount(2, $tokens[0]); // Check the types of the start/end tokens $this->assertEquals('block', $tokens[0][0]->getType()); $this->assertEquals('block', $tokens[0][1]->getType()); // Check the token values for the start/end tokens $this->assertEquals('raw', $tokens[0][0]->getToken()); $this->assertEquals('endraw', $tokens[0][1]->getToken()); }
/** * Creates constraint object from tokens. * <p> * If parameter $constraintName is not passed then current position should point to the name of the constraint. * * @param Tokenizer $tokenizer Tokens collection. * @param string $constraintName Optional name of the constraint. * * @return Constraint */ public static function create(Tokenizer $tokenizer, $constraintName = '') { if ($constraintName === false) { $constraintName = ''; } elseif (!$constraintName) { $constraintName = $tokenizer->getCurrentToken()->text; $tokenizer->nextToken(); $tokenizer->skipWhiteSpace(); } $constraint = new self($constraintName); $token = $tokenizer->getCurrentToken(); $level = $token->level; $constraintDefinition = ''; do { if ($token->level == $level && $token->text == ',') { break; } if ($token->level < $level && $token->text == ')') { break; } $constraintDefinition .= $token->text; $token = $tokenizer->nextToken(); } while (!$tokenizer->endOfInput()); $constraint->setBody($constraintDefinition); return $constraint; }
/** * test * * execute few tests to validate that everything is still working * * @param string $filename filename to load to tokenizer class for example "tokenizer-forever-21.json" */ function test($filename) { $t = new Tokenizer($filename); // simple texts and color $re = $t->tokenize($s = "red dress"); echo "{$s}:"; echo " 1" . (in_array("red", $re["properties"]["color"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; $re = $t->tokenize($s = "brick red dress"); echo "{$s}:"; echo " 1" . (in_array("brick red", $re["properties"]["color"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; $re = $t->tokenize($s = "blueish dress"); echo "{$s}:"; echo " 1" . (in_array("blue", $re["properties"]["color"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); //var_dump($re); echo "\n\n"; // gender $re = $t->tokenize($s = "woman dress"); echo "{$s}:"; echo " 1" . (in_array("woman", $re["properties"]["gender"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; $re = $t->tokenize($s = "men dress"); echo "{$s}:"; echo " 1" . (in_array("men", $re["properties"]["gender"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; // pattern $re = $t->tokenize($s = "woman rust red white polka dot dress"); echo "{$s}:"; echo " 1" . (in_array("polka dot", $re["properties"]["pattern"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; // brand $re = $t->tokenize($s = "gap woman rust red white polka dot dress"); echo "{$s}:"; echo " 1" . (in_array("polka dot", $re["properties"]["pattern"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; $re = $t->tokenize($s = "forever 21 woman rust red white polka dot dress"); echo "{$s}:"; echo " 1" . (in_array("polka dot", $re["properties"]["pattern"], true) ? "success" : "failed"); echo " 2" . ($re["keywords"] == "dress" ? "success" : "failed"); echo "\n\n"; var_dump($re); }
public function __construct() { parent::__construct(); $this->setName('CssLike'); $this->addTest(TokenFilter::suite()); $this->addTest(Tokenizer::suite()); $this->addTest(Parser::suite()); $this->addTest(CssLike::suite()); }
/** * Dokonuje tokenizacji i wpisania liczby potencjalnych wulgaryzmów. * Zwraca tablicê z wyrazami, o ile jakie¶ znaleziono. * * @param string $comment Komentarz do przetworzenia. * @return array Tablicê wyrazów lub false, je¶li wyrazów nie znaleziono. */ protected function tokenize($comment) { $tok_comment = self::$tokenizer->tokenize($comment); // zdjêcie informacji o potencjalnych wulgaryzmach $this->vulg_prop = intval(array_pop($tok_comment)); if (count($tok_comment) == 0) { return false; } return $tok_comment; }
/** * @covers spriebsch\PHPca\TokenFilterIterator::accept */ public function testAccept() { $file = Tokenizer::tokenize('filename', "<?php \n\n function hello()\n{\n print 'hello world';\n} \n ?>"); $it = new TokenFilterIterator($file, T_FUNCTION); $result = array(); foreach ($it as $item) { $result[] = $item; } $this->assertEquals(1, sizeof($result)); $this->assertEquals('T_FUNCTION', $result[0]->getName()); }
/** * Check self::$var * * @param TokenInfo $token */ private function _checkSelfToken($token) { if ($this->_isActive('useSelfToken')) { if (true === $this->_inClass && true === $this->_inFunction && false === $this->_functionStatic) { $strVarname = $this->tokenizer->peekNextNextToken()->text; if ('$' == $strVarname[0]) { $msg = $this->_getMessage("USE_SELF_TOKEN", 'self::' . $strVarname); $this->_writeError('useSelfToken', $msg); } } } }
/** * @param string $file * @return array * @todo Implements .proto file parsing into PHP array */ public function parse($file) { $filename = $this->rootPath . $file; // $cacheFile = $this->cachePath . $file . '.php'; // if (is_readable($cacheFile)) { // $data = include $cacheFile; // return $data; // } $source = file_get_contents($filename); $tokens = Tokenizer::parse($source); $data = $tokens; return $tokens; }
/** * Process a scalar value. * Handles string literals including defined constants * @return mixed * @throws Exception when there's an unexpected value */ protected function process_value() { # String literals if ($this->tokens->matches(T_STRING)) { $t_token = $this->tokens->pop(); $t_value = $t_token[1]; # PHP Standard string literals switch (strtolower($t_value)) { case 'null': return null; case 'true': return true; case 'false': return false; } # Defined constants $t_value = $this->constant_replace($t_value); if ($t_value !== $t_token[1]) { return $t_value; } throw new Exception("Unknown string literal '{$t_value}'"); } # Strings if ($this->tokens->matches(T_CONSTANT_ENCAPSED_STRING)) { $t_value = $this->tokens->pop(); return (string) stripslashes(substr($t_value[1], 1, -1)); } # Numbers $t_negate = 1; if ($this->tokens->matches('-')) { $this->tokens->pop(); $t_negate = -1; } if ($this->tokens->matches('+')) { $this->tokens->pop(); } # Integers if ($this->tokens->matches(T_LNUMBER)) { $t_value = $this->tokens->pop(); return $t_negate * (int) $t_value[1]; } # Floating point if ($this->tokens->matches(T_DNUMBER)) { $t_value = $this->tokens->pop(); return $t_negate * (double) $t_value[1]; } # Anything else throw new Exception("Unexpected value" . $this->tokens->value()); }
/** * @param $expression * @return Queue */ public function parse($expression) { $oQueue = Tokenizer::tokenizeExpression($expression); while ($oQueue->hasItems()) { $oCurrentToken = $oQueue->shift(); $this->_processToken($oCurrentToken); } while ($this->_operator_stack->hasOperators()) { $oTopOperator = $this->_operator_stack->pop(); if ($oTopOperator instanceof Parenthesis) { throw new MismatchingParenthesisException(); } $this->_output_queue->enqueue($oTopOperator); } return $this->_output_queue; }
/** * Creates sequence object from tokens. * <p> * Current position should point to the name of the sequence. * * @param Tokenizer $tokenizer Tokens collection. * * @return Sequence */ public static function create(Tokenizer $tokenizer) { $name = $tokenizer->getCurrentToken()->text; $sequence = new self($name); $tokenizer->resetState(); $definition = ''; while (!$tokenizer->endOfInput()) { $definition .= $tokenizer->getCurrentToken()->text; $tokenizer->nextToken(); } $sequence->setBody($definition); return $sequence; }
/** * Read (and remove) the UNA segment from the passed string. * * @param string $message The EDI message to extract the UNA from * * @return void */ protected function setupSpecialCharacters(&$message, Tokenizer $tokenizer) { if (substr($message, 0, 3) !== "UNA") { return; } # Get the character definitions $chars = mb_substr($message, 3, 6); # Remove the UNA segment from the original message $message = ltrim(mb_substr($message, 9), "\r\n"); $pos = 0; $tokenizer->setComponentSeparator(mb_substr($chars, $pos++, 1)); $tokenizer->setDataSeparator(mb_substr($chars, $pos++, 1)); $tokenizer->setDecimalPoint(mb_substr($chars, $pos++, 1)); $tokenizer->setEscapeCharacter(mb_substr($chars, $pos++, 1)); mb_substr($chars, $pos++, 1); $tokenizer->setSegmentTerminator(mb_substr($chars, $pos++, 1)); }
public function parse($input) { $tokenStream = $this->_tokenizer->tokenize($input); return $this->_parseTokenStream($tokenStream); }
/** * @covers spriebsch\PHPca\File::seekNamespace */ public function testSeekNamespace() { $file = Tokenizer::tokenize('test.php', file_get_contents(__DIR__ . '/_testdata/File/blocks.php')); $file->rewind(); $file->seekNamespace('B\\C'); $this->assertEquals('T_OPEN_CURLY', $file->current()->getName()); $this->assertEquals(18, $file->current()->getLine()); $file->seekNamespace('A\\B'); $this->assertEquals('T_OPEN_CURLY', $file->current()->getName()); $this->assertEquals(4, $file->current()->getLine()); }
public function testGetClassNames() { $tokenizer = new Tokenizer(file_get_contents(__DIR__ . '/resources/tokenizerClasses.php')); $this->assertSame(array('Foo\\class2', 'Foo\\class3'), $tokenizer->getInstantiableClassNames()); }
public function parse($s) { self::$tokenizer = self::$tokenizer ?: new Tokenizer(array(self::T_WHITESPACE => '\\s+', self::T_COMMENT => '(?s)/\\*.*?\\*/', self::T_STRING => Parser::RE_STRING, self::T_KEYWORD => '(?:true|false|null|and|or|xor|clone|new|instanceof|return|continue|break|[A-Z_][A-Z0-9_]{2,})(?![\\w\\pL_])', self::T_CAST => '\\((?:expand|string|array|int|integer|float|bool|boolean|object)\\)', self::T_VARIABLE => '\\$[\\w\\pL_]+', self::T_NUMBER => '[+-]?[0-9]+(?:\\.[0-9]+)?(?:e[0-9]+)?', self::T_SYMBOL => '[\\w\\pL_]+(?:-[\\w\\pL_]+)*', self::T_CHAR => '::|=>|->|\\+\\+|--|<<|>>|<=|>=|===|!==|==|!=|<>|&&|\\|\\||\\?\\?|[^"\']'), 'u'); return self::$tokenizer->tokenize($s); }
static function analyse($expression) { if (is_string($expression)) { $tokenizer = new Tokenizer(); $tokens = $tokenizer->tokenize($expression); } else { // a yet formed ListOfToken $tokens = $expression; } // echo "Testaĵo unu: '$tokens'<br>"; foreach (Token::$tokenStickness as $operator) { // echo "Testaĵo du: '$tokens'<br>"; if ($operator->reversedOrderParsing) { $tokensToParse = $tokens->reverse(); } else { $tokensToParse = $tokens; // echo "Testaĵo tri: '$tokens'<br>"; } do { $operatorFound = false; foreach ($tokensToParse as $token) { // echo "Testaĵo tri kaj unu: '$tokens'<br>"; if ($token->typeOf($operator) && !$token->hasChildren()) { $operatorFound = true; // echo "Elektita ".get_class($operator)."<br>"; // echo "$tokens [$token] videbla antauxen !!<br>\n"; $token->reduce($tokens); // echo "$tokens videbla posten !!<br>\n"; break; // redo the foreach because tokens changed } // echo "Testaĵo tri kaj du: '$tokens'<br>"; } // echo "Testaĵo kvar: '$tokens'<br>"; } while ($operatorFound); } // TODO: catch the error when it occurs // e.g. "1+2)*3" gives two roots "+(1)(2) *())(3)" // in fact, the "times" should not have a parenthesis as a child if ($tokens->count() > 1) { throw new Exception("Analysis error '{$tokens}'\nhas more than one root"); // echo "Analysis notice '{$tokens}' has more than one root\n"; // echo "Analysis notice, there's more than one root\nSee Tokenizer!\n"; // we keep only the first element $tokens->list = array_slice($tokens->list, 0, 1); } return $tokens; }
/** * Render the Mustache template * @param array $scope (optional) The scope to start working from on the parameters. * @return string Returns the parsed template * @since 1.0-sofia */ public function render($scope = array()) { $this->loadParameters(); $tokenizer = new Tokenizer(); $tokenizer->changeDelimiters($this->openDelimiter, $this->closeDelimiter); $tokens = $tokenizer->parse($this->template); $this->line = 1; $buffer = $this->parse($scope, $tokens); return $buffer; }
function parse() { // scan all tokens for ($i = 0, $tokencount = count($this->tokens); $i < $tokencount; $i++, $this->tif++) { if (is_array($this->tokens[$i])) { $token_name = $this->tokens[$i][0]; $token_value = $this->tokens[$i][1]; $line_nr = $this->tokens[$i][2]; // add preloader info for big files if ($line_nr % PRELOAD_SHOW_LINE == 0) { echo $GLOBALS['fit'] . '|' . $GLOBALS['file_amount'] . '|' . $this->file_pointer . ' (line ' . $line_nr . ')|' . $GLOBALS['timeleft'] . '|' . "\n"; @ob_flush(); flush(); } # debug #echo "file:".$file_name.",line:".$line_nr.",token:".token_name($token_name).","; #echo "value:".htmlentities($token_value).","; #echo "in_function:".$in_function.",in_class:".$in_class."<br>"; /************************* T_VARIABLE *************************/ if ($token_name === T_VARIABLE) { // $var() if ($this->tokens[$i + 1][0] === '(') { $this->variable_scan($i, 0, 'eval', 'Userinput is used as dynamic function name. Arbitrary functions may be called.'); } else { if (($this->tokens[$i - 1] === '$' || $this->tokens[$i - 1] === '{' && $this->tokens[$i - 2] === '$') && ($this->tokens[$i + 1] === '=' || in_array($this->tokens[$i + 1][0], Tokens::$T_ASSIGNMENT))) { $this->variable_scan($i, $this->tokens[$i - 1] === '{' ? 2 : 1, 'extract', 'Userinput is used to build the variable name. Arbitrary variables may be overwritten/initialized which may lead to further vulnerabilities.'); } else { if ($this->tokens[$i - 1][0] === T_AS || $this->tokens[$i - 1][0] === T_DOUBLE_ARROW && $this->tokens[$i - 2][0] === T_VARIABLE && $this->tokens[$i - 3][0] === T_AS) { $c = 3; while ($this->tokens[$i - $c][0] !== T_FOREACH) { $c++; if ($i - $c < 0 || $this->tokens[$i - $c] === ';') { addError('Could not find FOREACH token before AS token', array_slice($this->tokens, $i - 5, 10), $this->tokens[$i - 1][2], $this->file_pointer); break; } } $this->variable_add($token_value, array_slice($this->tokens, $i - $c, $c + Analyzer::getBraceEnd($this->tokens, $i)), '', 0, 0, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array()); } else { if ($this->tokens[$i - 2][0] === T_FOR && ($this->tokens[$i + 1] === '=' || in_array($this->tokens[$i + 1][0], Tokens::$T_ASSIGNMENT))) { $c = 1; $newbraceopen = 1; $firstsemi = 0; // do not use getBraceEnd() here, because we dont want to stop at ';' in for(;;) while ($newbraceopen !== 0) { // watch function calls in function call if ($this->tokens[$i + $c] === '(') { $newbraceopen++; } else { if ($this->tokens[$i + $c] === ')') { $newbraceopen--; } else { if ($this->tokens[$i + $c] === ';' && $firstsemi < 1) { $firstsemi = $c; } } } $c++; if (!isset($this->tokens[$i + $c])) { addError('Could not find closing parenthesis of for-statement.', array_slice($this->tokens, $i - 2, 10), $this->tokens[$i - 2][2], $this->file_pointer); break; } } // overwrite value of first var because it is looped // this is an assumption, other vars could be declared for($var1=1;$var2=2;...) $this->tokens[$i + 2][0] = T_ENCAPSED_AND_WHITESPACE; $this->tokens[$i + 2][1] = '*'; $this->variable_add($token_value, array_slice($this->tokens, $i - 2, $c + 2), '', 1, 2 + $firstsemi, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array()); } else { if ($this->tokens[$i + 1] === '=' || in_array($this->tokens[$i + 1][0], Tokens::$T_ASSIGNMENT)) { $vardeclare = array(); // $var = array(1,2,3,4); if ($this->tokens[$i + 2][0] === T_ARRAY && $this->tokens[$i + 3] === '(' && $this->tokens[$i + 4] !== ')') { $d = 4; $keyindex = 0; $newbraceopen = 1; $keytokens = array(); $valuetokens = array(); while (!($newbraceopen === 0 || $this->tokens[$i + $d] === ';') && $keyindex < MAX_ARRAY_ELEMENTS) { // count parameters if ($newbraceopen === 1 && ($this->tokens[$i + $d] === ',' || $this->tokens[$i + $d] === ')')) { $newindexvar = $this->tokens[$i]; $newindexvar[3][] = empty($keytokens) ? $keyindex : $keytokens; $this->variable_add($token_value, array_merge(array($newindexvar, $this->tokens[$i + 1]), $valuetokens), ' array() ', in_array($this->tokens[$i + 1][0], Tokens::$T_ASSIGNMENT) ? 0 : 1, 0, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array(), empty($keytokens) ? $keyindex : $keytokens); $keyindex++; $keytokens = array(); $valuetokens = array(); } else { if ($this->tokens[$i + $d] === '(') { $newbraceopen++; } else { if ($this->tokens[$i + $d] === ')') { $newbraceopen--; } else { if ($this->tokens[$i + $d][0] === T_DOUBLE_ARROW) { $keytokens = $valuetokens; $valuetokens = array(); } else { $valuetokens[] = $this->tokens[$i + $d]; } } } } $d++; if (!isset($this->tokens[$i + $d])) { addError('Could not find closing parenthesis of array()-declaration.', array_slice($this->tokens, $i, 10), $this->tokens[$i + 2][2], $this->file_pointer); break; } } $vardeclare['end'] = Analyzer::getBraceEnd($this->tokens, $i) + 1; // $var = anything; } else { $this->variable_add($token_value, array_slice($this->tokens, $i, $vardeclare['end'] = Analyzer::getBraceEnd($this->tokens, $i) + 1), '', in_array($this->tokens[$i + 1][0], Tokens::$T_ASSIGNMENT) ? 0 : 1, 0, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array()); } // save var and var declare scope for data leak scan $vardeclare['start'] = $i; $vardeclare['name'] = $token_value; $vardeclare['linenr'] = $line_nr; $vardeclare['end'] += $i - 1; } } } } } // $class->var //else if ($token_name === T_STRING && $tokens[$i-1][0] === T_OBJECT_OPERATOR && $tokens[$i-2][0] === T_VARIABLE) // add user input variables to global finding list if (in_array($token_value, Sources::$V_USERINPUT)) { if (isset($this->tokens[$i][3])) { if (!is_array($this->tokens[$i][3][0])) { $GLOBALS['user_input'][$token_value . '[' . $this->tokens[$i][3][0] . ']'][$this->file_pointer][] = $line_nr; } else { $GLOBALS['user_input'][$token_value . '[' . Analyzer::get_tokens_value($this->file_pointer, $this->tokens[$i][3][0], $this->in_function ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, $i) . ']'][$this->file_pointer][] = $line_nr; } } else { $GLOBALS['user_input'][$token_value][$this->file_pointer][] = $line_nr; } // count found userinput in function for graphs if ($this->in_function) { $GLOBALS['user_functions_offset'][$this->function_obj->name][5]++; } else { $GLOBALS['user_functions_offset']['__main__'][5]++; } } } else { if (in_array($token_name, Tokens::$T_FUNCTIONS) || in_array($token_name, Tokens::$T_XSS) && ($_POST['vector'] == 'client' || $_POST['vector'] == 'xss' || $_POST['vector'] == 'all')) { $class = ''; /************************* T_STRING *************************/ if ($token_name === T_STRING && $this->tokens[$i + 1] === '(') { // define("FOO", $_GET['asd']); if ($token_value === 'define') { $c = 1; while ($this->tokens[$i + $c] !== ',') { $c++; if ($this->tokens[$i + $c] === ';' || !isset($this->tokens[$i + $c])) { addError('Second parameter of define() is missing.', array_slice($this->tokens, $i, $c), $this->tokens[$i][2], $this->file_pointer); break; } } $this->variable_add(str_replace(array('"', "'"), '', $this->tokens[$i + 2][1]), array_slice($this->tokens, $i, Analyzer::getBraceEnd($this->tokens, $i) + 1), ' define() ', $c, 0, $line_nr, $i); } else { if ($token_value === 'ini_set') { $setting = str_replace(array("'", '"'), '', $this->tokens[$i + 2][1]); // ini_set('include_path', 'foo/bar') if ($setting === 'include_path') { $path = Analyzer::get_tokens_value($this->file_pointer, array_slice($this->tokens, $i + 4, Analyzer::getBraceEnd($this->tokens, $i + 4) + 1), $this->in_function ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, $i); $this->include_paths = array_unique(array_merge($this->include_paths, Analyzer::get_ini_paths($path))); } } else { if ($token_value === 'set_include_path') { $path = Analyzer::get_tokens_value($this->file_pointer, array_slice($this->tokens, $i + 1, Analyzer::getBraceEnd($this->tokens, $i + 1) + 1), $this->in_function ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, $i); $this->include_paths = array_unique(array_merge($this->include_paths, Analyzer::get_ini_paths($path))); } else { if ($token_value === 'set_error_handler') { $token_value = str_replace(array('"', "'"), '', $this->tokens[$i + 2][1]); } else { if ($token_value === 'compact' && $this->tokens[$i - 2][0] === T_VARIABLE) { $f = 2; while ($this->tokens[$i + $f] !== ')') { // for all array keys save new variable declarations if ($this->tokens[$i + $f][0] === T_CONSTANT_ENCAPSED_STRING) { $this->variable_add($this->tokens[$i - 2][1], array(array(T_VARIABLE, $this->tokens[$i - 2][1], $line_nr, array(str_replace(array('"', "'"), '', $this->tokens[$i + $f][1]))), '=', array(T_VARIABLE, '$' . str_replace(array('"', "'"), '', $this->tokens[$i + $f][1]), $line_nr), ';'), ' compact() ', 2, 0, $line_nr, $i, $tokens[$i - 2][3], str_replace(array('"', "'"), '', $this->tokens[$i + $f][1])); } $f++; if ($this->tokens[$i + $f] === ';' || !isset($this->tokens[$i + $f])) { addError('Closing parenthesis of compact() is missing.', array_slice($this->tokens, $i, $f), $this->tokens[$i][2], $this->file_pointer); break; } } } else { if ($token_value === 'preg_match' || $token_value === 'preg_match_all') { $c = 2; $parameter = 1; $newbraceopen = 1; while ($newbraceopen !== 0) { if (is_array($this->tokens[$i + $c]) && $this->tokens[$i + $c][0] === T_VARIABLE && $parameter == 3) { // add variable declaration to beginning of varlist // fake assignment parameter so it will not get traced $this->variable_add($this->tokens[$i + $c][1], array_slice($this->tokens, $i, Analyzer::getBraceEnd($this->tokens, $i + 2) + 3), ' preg_match() ', 0, $c - 1, $this->tokens[$i + $c][2], $i, isset($this->tokens[$i + $c][3]) ? $this->tokens[$i + $c][3] : array()); } else { if ($newbraceopen === 1 && $this->tokens[$i + $c] === ',') { $parameter++; } else { if ($this->tokens[$i + $c] === '(') { $newbraceopen++; } else { if ($this->tokens[$i + $c] === ')') { $newbraceopen--; } else { if ($this->tokens[$i + $c] === ';' || !isset($this->tokens[$i + $c])) { addError('Closing parenthesis of ' . $token_value . '() is missing.', array_slice($this->tokens, $i, $c), $this->tokens[$i][2], $this->file_pointer); break; } } } } } $c++; } } else { if ($token_value === 'import_request_variables') { // add register_globals implementation $this->variable_add('register_globals', array_slice($this->tokens, $i, Analyzer::getBraceEnd($this->tokens, $i + 1) + 1), 'register_globals implementation', 0, 0, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array()); } else { if ($token_value === 'parse_str') { $c = 2; $parameter = 1; $newbraceopen = 1; while ($newbraceopen !== 0) { if (is_array($this->tokens[$i + $c]) && $this->tokens[$i + $c][0] === T_VARIABLE && $parameter == 2) { // add variable declaration to beginning of varlist // fake assignment parameter so it will not get traced $this->variable_add($this->tokens[$i + $c][1], array_slice($this->tokens, $i, Analyzer::getBraceEnd($this->tokens, $i + 2) + 3), ' parse_str() ', 0, $c - 1, $this->tokens[$i + $c][2], $i, isset($this->tokens[$i + $c][3]) ? $this->tokens[$i + $c][3] : array()); } else { if ($newbraceopen === 1 && $this->tokens[$i + $c] === ',') { $parameter++; } else { if ($this->tokens[$i + $c] === '(') { $newbraceopen++; } else { if ($this->tokens[$i + $c] === ')') { $newbraceopen--; } else { if ($this->tokens[$i + $c] === ';' || !isset($this->tokens[$i + $c])) { addError('Closing parenthesis of ' . $token_value . '() is missing.', array_slice($this->tokens, $i, $c), $this->tokens[$i][2], $this->file_pointer); break; } } } } } $c++; } } } } } } } } } //add interesting function calls to info gathering if (isset($this->info_functions[$token_value])) { $GLOBALS['info'][] = $this->info_functions[$token_value]; } else { if ($this->tokens[$i - 1][0] !== T_NEW && isset($this->vuln_classes[$token_value])) { $this->class_vars[$this->tokens[$i - 2][1]] = $token_value; } else { // $classvar->bla() if ($this->tokens[$i - 1][0] === T_OBJECT_OPERATOR) { $classvar = $this->tokens[$i - 2][1]; if ($classvar[0] !== '$') { $classvar = '$' . $classvar; } $class = $classvar === '$this' || $classvar === '$self' ? $this->class_name : $this->class_vars[$classvar]; } else { if ($this->tokens[$i - 1][0] === T_DOUBLE_COLON) { $class = $this->tokens[$i - 2][1]; } } // save function call for graph if (isset($GLOBALS['user_functions_offset'][($class ? $class . '::' : '') . $token_value])) { $GLOBALS['user_functions_offset'][($class ? $class . '::' : '') . $token_value][3][] = array($this->file_pointer, $line_nr); if ($this->in_function) { $GLOBALS['user_functions_offset'][$this->function_obj->name][4][] = $token_value; } else { $GLOBALS['user_functions_offset']['__main__'][4][] = $token_value; } } // check if token is function call that affects variable scope (global) if (isset($this->globals_from_function[$token_value])) { // put all previously saved global var assignments to global scope foreach ($this->globals_from_function[$token_value] as $var_name => $new_vars) { foreach ($new_vars as $new_var) { $new_var->comment = $new_var->comment . " by {$token_value}()"; if (!isset($this->var_declares_global[$var_name])) { $this->var_declares_global[$var_name] = array($new_var); } else { array_unshift($this->var_declares_global[$var_name], $new_var); } } } } } } } else { if (in_array($token_name, Tokens::$T_INCLUDES) && !$this->in_function) { $GLOBALS['count_inc']++; // include('xxx') if ($this->tokens[$i + 1] === '(' && $this->tokens[$i + 2][0] === T_CONSTANT_ENCAPSED_STRING && $this->tokens[$i + 3] === ')' || is_array($this->tokens[$i + 1]) && $this->tokens[$i + 1][0] === T_CONSTANT_ENCAPSED_STRING && $this->tokens[$i + 2] === ';') { // include('file') if ($this->tokens[$i + 1] === '(') { $inc_file = substr($this->tokens[$i + 2][1], 1, -1); $skip = 5; } else { $inc_file = substr($this->tokens[$i + 1][1], 1, -1); $skip = 3; } } else { $inc_file = Analyzer::get_tokens_value($this->file_pointer, array_slice($this->tokens, $i + 1, $c = Analyzer::getBraceEnd($this->tokens, $i + 1) + 1), $this->in_function ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, $i); // in case the get_var_value added several php files, take the first $several = explode('.php', $inc_file); if (count($several) > 1) { $try_file = $several[0] . '.php'; } $skip = $c + 1; // important to save $c+1 here } $try_file = $inc_file; // try absolute include path foreach ($this->include_paths as $include_path) { if (is_file("{$include_path}/{$try_file}")) { $try_file = "{$include_path}/{$try_file}"; break; } } // if dirname(__FILE__) appeared it was an absolute path if (!is_file($try_file)) { // check relativ path $try_file = dirname($this->file_name) . '/' . $inc_file; if (!is_file($try_file)) { $other_try_file = dirname($this->file_pointer) . '/' . $inc_file; // if file can not be found check include_path if set if (!is_file($other_try_file)) { if (isset($this->include_paths[0])) { foreach ($this->include_paths as $include_path) { if (is_file(dirname($this->file_name) . '/' . $include_path . '/' . $inc_file)) { $try_file = dirname($this->file_name) . '/' . $include_path . '/' . $inc_file; break; } else { if (is_file(dirname($this->file_pointer) . '/' . $include_path . '/' . $inc_file)) { $try_file = dirname($this->file_pointer) . '/' . $include_path . '/' . $inc_file; break; } } } } // if still not a valid file, look a directory above if (!is_file($try_file)) { $try_file = str_replace('\\', '/', $try_file); $pos = strlen($try_file); // replace each found / with /../, start from the end of file name for ($c = 1; $c < substr_count($try_file, '/'); $c++) { $pos = strripos(substr($try_file, 1, $pos), '/'); if (is_file(substr_replace($try_file, '/../', $pos + 1, 1))) { $try_file = substr_replace($try_file, '/../', $pos + 1, 1); break; } } if (!is_file($try_file)) { $try_file = str_replace('\\', '/', $other_try_file); $pos = strlen($try_file); // replace each found / with /../, start from the end of file name for ($c = 1; $c < substr_count($try_file, '/'); $c++) { $pos = strripos(substr($try_file, 1, $pos), '/'); if (is_file(substr_replace($try_file, '/../', $pos + 1, 1))) { $try_file = substr_replace($try_file, '/../', $pos + 1, 1); break; } } // if still not a valid file, guess it if (!is_file($try_file)) { $searchfile = basename($try_file); if (!strstr($searchfile, '$_USERINPUT')) { foreach ($GLOBALS['files'] as $cfile) { if (basename($cfile) == $searchfile) { $try_file = $cfile; break; } } } } } } } else { $try_file = $other_try_file; } } } $try_file_unreal = $try_file; $try_file = realpath($try_file); // file is valid if (!empty($try_file_unreal) && !empty($try_file) && ($inc_lines = @file($try_file_unreal))) { // file name has not been included if (!in_array($try_file, $this->inc_map)) { // Tokens $tokenizer = new Tokenizer($try_file); $inc_tokens = $tokenizer->tokenize(implode('', $inc_lines)); unset($tokenizer); // if(include('file')) { - include tokens after { and not into the condition :S if ($this->in_condition) { $this->tokens = array_merge(array_slice($this->tokens, 0, $this->in_condition + 1), $inc_tokens, array(array(T_INCLUDE_END, 0, 1)), array_slice($this->tokens, $this->in_condition + 1)); } else { // insert included tokens in current tokenlist and mark end $this->tokens = array_merge(array_slice($this->tokens, 0, $i + $skip), $inc_tokens, array(array(T_INCLUDE_END, 0, 1)), array_slice($this->tokens, $i + $skip)); } $tokencount = count($this->tokens); // set lines pointer to included lines, save last pointer // (the following tokens will be the included ones) $this->lines_stack[] = $inc_lines; $this->lines_pointer = end($this->lines_stack); // tokennr in file $this->tif_stack[] = $this->tif; $this->tif = -$skip; // set the current file pointer $this->file_pointer = $try_file; if (!isset($GLOBALS['file_sinks_count'][$this->file_pointer])) { $GLOBALS['file_sinks_count'][$this->file_pointer] = 0; } echo $GLOBALS['fit'] . '|' . $GLOBALS['file_amount'] . '|' . $this->file_pointer . '|' . $GLOBALS['timeleft'] . '|' . "\n"; @ob_flush(); flush(); $this->comment = basename($inc_file); $this->inc_file_stack[] = $try_file; // build include map for file list $this->inc_map[] = $try_file; // all basic includes } } else { $GLOBALS['count_inc_fail']++; // add information about include error in debug mode if ($GLOBALS['verbosity'] == 5) { // add include command to output $found_value = highlightline(array_slice($this->tokens, $i, $skip), $this->comment, $line_nr, $token_value); $new_find = new InfoTreeNode($found_value); $new_find->lines[] = $line_nr; $new_find->filename = $this->file_pointer; $new_find->title = "Include error: tried to include: " . $try_file_unreal; if (isset($GLOBALS['output'][$this->file_name]['inc'])) { $GLOBALS['output'][$this->file_name]['inc']->treenodes[] = $new_find; } else { $new_block = new VulnBlock($this->tif . '_' . $this->tokens[$i][2] . '_' . basename($this->file_pointer), 'Debug'); $new_block->treenodes[] = $new_find; $new_block->vuln = true; $GLOBALS['output'][$this->file_name]['inc'] = $new_block; } } } } } /************************* TAINT ANALYSIS *************************/ if (isset($this->scan_functions[$token_value]) && $GLOBALS['verbosity'] != 5 && (empty($class) || ($this->in_function && is_array($function_obj->parameters) && in_array($classvar, $function_obj->parameters) || @in_array($token_value, $this->vuln_classes[$class])))) { if (!$this->already_scanned($i)) { // build new find $new_find = new VulnTreeNode(); $new_find->name = $token_value; $new_find->lines[] = $line_nr; // add dependencies (already here, because checked during var trace foreach ($this->dependencies as $deplinenr => $dependency) { if (!empty($dependency)) { $new_find->dependencies[$deplinenr] = $dependency; } } // count sinks $GLOBALS['file_sinks_count'][$this->file_pointer]++; if ($this->in_function) { $GLOBALS['user_functions_offset'][$this->function_obj->name][6]++; } else { $GLOBALS['user_functions_offset']['__main__'][6]++; } $parameter = 1; $var_counter = 0; $vulnparams = array(0); $has_vuln_parameters = false; $parameter_has_userinput = false; $parameter_func_depend = false; $secured_by_start = false; // function calls without quotes (require $inc;) --> no brace count $parentheses_open = $this->tokens[$i + 1] === '(' ? 1 : -2; // -2: detection of braces doesnt matter $parentheses_save = -1; $in_securing = false; $ignore_securing = false; $c = $this->tokens[$i + 1] === '(' ? 2 : 1; // important $tainted_vars = array(); $reconstructstr = ''; $addtitle = ''; $this->securedby = array(); // get all variables in parameter list between (...) // not only until ';' because: system(get($a),$b,strstr($c)); while ($parentheses_open !== 0 && $this->tokens[$i + $c] !== ';') { $this_one_is_secure = false; if (is_array($this->tokens[$i + $c])) { // scan variables and constants if ($this->tokens[$i + $c][0] === T_VARIABLE && $this->tokens[$i + $c + 1][0] !== T_OBJECT_OPERATOR || $this->tokens[$i + $c][0] === T_STRING && $this->tokens[$i + $c + 1] !== '(') { $var_counter++; // scan only potential vulnerable parameters of function call if (in_array($parameter, $this->scan_functions[$token_value][0]) || isset($this->scan_functions[$token_value][0][0]) && $this->scan_functions[$token_value][0][0] === 0) { $has_vuln_parameters = true; if (is_array($this->tokens[$i + $c - 1]) && in_array($this->tokens[$i + $c - 1][0], Tokens::$T_CASTS) || is_array($this->tokens[$i + $c + 1]) && in_array($this->tokens[$i + $c + 1][0], Tokens::$T_ARITHMETIC) || $in_securing) { $secured_by_start = true; $this_one_is_secure = true; } if ($in_securing && !$ignore_securing) { $this->securedby[] = $securing_function; } // trace back parameters and look for userinput, trace constants globally $userinput = $this->scan_parameter($new_find, $new_find, $this->tokens[$i + $c], $this->tokens[$i + $c][3], $i + $c, $this->in_function && $this->tokens[$i + $c][1][0] === '$' ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, false, $this->scan_functions[$token_value][1], false, $ignore_securing, $this_one_is_secure || $in_securing); $reconstructstr .= Analyzer::get_var_value($this->file_pointer, $this->tokens[$i + $c], $this->in_function && $this->tokens[$i + $c][1][0] === '$' ? $this->var_declares_local : $this->var_declares_global, $this->var_declares_global, $i + $c, $this->source_functions); if ($userinput) { $vulnparams[] = $parameter; if ($userinput == 1) { $parameter_has_userinput = true; } else { if ($userinput == 2) { $parameter_func_depend = true; } } $tainted_vars[] = $var_counter; } } // mark userinput for quote analysis if (in_array($this->tokens[$i + $c][1], Sources::$V_USERINPUT)) { $reconstructstr .= '$_USERINPUT'; } } else { if ($this->tokens[$i + $c][0] === T_STRING && in_array($this->tokens[$i + $c][1], $this->source_functions) && (in_array($parameter, $this->scan_functions[$token_value][0]) || isset($this->scan_functions[$token_value][0][0]) && $this->scan_functions[$token_value][0][0] === 0)) { $has_vuln_parameters = true; $parameter_has_userinput = true; $new_find->marker = 1; $reconstructstr .= '$_USERINPUT'; $new_find->title = 'Userinput returned by function <i>' . $this->tokens[$i + $c][1] . '</i> reaches sensitive sink'; $this->addtriggerfunction($new_find); } else { if ($this->tokens[$i + $c][0] === T_STRING && isset($this->tokens[$i + $c][1]) && in_array($this->tokens[$i + $c][1], $GLOBALS['F_INSECURING_STRING']) && $parentheses_save == -1) { $parentheses_save = $parentheses_open; $ignore_securing = true; } else { if (!$ignore_securing && ($this->tokens[$i + $c][0] === T_STRING && (is_array($this->scan_functions[$token_value][1]) && in_array($this->tokens[$i + $c][1], $this->scan_functions[$token_value][1]) || in_array($this->tokens[$i + $c][1], $GLOBALS['F_SECURING_STRING']))) || in_array($this->tokens[$i + $c][0], Tokens::$T_CASTS) && $this->tokens[$i + $c + 1] === '(') { $securing_function = $this->tokens[$i + $c][1]; $parentheses_save = $parentheses_open; $in_securing = true; $secured_by_start = true; } else { if ($this->tokens[$i + $c][0] === T_CONSTANT_ENCAPSED_STRING) { $reconstructstr .= substr($this->tokens[$i + $c][1], 1, -1); } else { if ($this->tokens[$i + $c][0] === T_ENCAPSED_AND_WHITESPACE) { $reconstructstr .= $this->tokens[$i + $c][1]; } } } } } } } else { if ($parentheses_open === 1 && $this->tokens[$i + $c] === ',') { $parameter++; } else { if ($this->tokens[$i + $c] === '(') { $parentheses_open++; } else { if ($this->tokens[$i + $c] === ')') { $parentheses_open--; if ($parentheses_open === $parentheses_save) { $parentheses_save = -1; $in_securing = false; $securing_function = ''; $ignore_securing = false; } } else { if (!isset($this->tokens[$i + $c])) { addError('Closing parenthesis of ' . $token_value . '() is missing.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->file_pointer); break; } } } } } $c++; } // quote analysis for securing functions F_QUOTE_ANALYSIS // they only protect when return value is embedded into quotes if ($this->quote_analysis_needed() && substr_count($reconstructstr, '$_USERINPUT') > 0) { // idea: explode on $_USERINPUT and count quotes in SQL query before // if not even, then the $_USERINPUT is in an open quote $parts = explode('$_USERINPUT', $reconstructstr); foreach ($this->securedby as $var => $securefunction) { if (in_array($securefunction, $GLOBALS['F_QUOTE_ANALYSIS'])) { // extract the string before the userinput $checkstring = ''; $d = 1; foreach ($parts as $part) { $checkstring .= $part; if ($d >= $var) { break; } $d++; } // even amount of quotes (or none) in string // --> no quotes around userinput // --> securing function is useless if (substr_count($checkstring, "'") % 2 === 0 && substr_count($checkstring, '"') % 2 === 0) { $has_vuln_parameters = true; $parameter_has_userinput = true; $new_find->title .= "Userinput reaches sensitive sink due to insecure usage of {$securefunction}() without quotes"; } } } } // add find to output if function call has variable parameters (With userinput) if ($has_vuln_parameters && ($parameter_has_userinput || $parameter_func_depend) || $GLOBALS['verbosity'] == 4 || isset($this->scan_functions[$token_value][3])) { $vulnstart = $i; $vulnadd = 1; // prepend $var assignment if (isset($vardeclare)) { $vulnstart = $vardeclare['start']; $vulnadd = $vardeclare['end'] - $vardeclare['start'] - $c + 1; //3; } else { if (isset($GLOBALS['F_XSS'][$this->tokens[$i - 1][1]])) { $vulnstart = $i - 1; $vulnadd = 2; } else { if ($this->tokens[$i - 1][0] === T_DOUBLE_COLON || $this->tokens[$i - 1][0] === T_OBJECT_OPERATOR) { $vulnstart = $i - 2; $vulnadd = 2; } } } if (isset($GLOBALS['user_functions'][$this->file_name][$token_value])) { $found_line = '<A NAME="' . $token_value . '_call" class="jumplink"></A>'; $found_line .= highlightline(array_slice($this->tokens, $vulnstart, $c + $vulnadd), $this->comment, $line_nr, false, $token_value); } else { $found_line = highlightline(array_slice($this->tokens, $vulnstart, $c + $vulnadd), $this->comment, $line_nr, $token_value, false, $tainted_vars); } $new_find->value = $found_line; $new_find->filename = $this->file_pointer; if ($secured_by_start) { $new_find->marker = 2; } // only show vuln user defined functions // if call with userinput has been found if (isset($GLOBALS['user_functions'][$this->file_name][$token_value])) { $GLOBALS['user_functions'][$this->file_name][$token_value]['called'] = true; } if ($this->in_function) { $this->ignore_securing_function = true; // mark function in class as vuln if ($this->in_class) { $this->vuln_classes[$this->class_name][] = $this->function_obj->name; } } // putenv with userinput --> getenv is treated as userinput if ($token_value === 'putenv') { $this->source_functions[] = 'getenv'; $GLOBALS['source_functions'][] = 'getenv'; $new_find->title = 'User can set PHP enviroment variables. Adding getenv() to tainting functions'; } else { if ($token_value === 'apache_setenv') { $this->source_functions[] = 'apache_getenv'; $GLOBALS['source_functions'][] = 'apache_getenv'; $new_find->title = 'User can set Apache enviroment variables. Adding apache_getenv() to tainting functions'; } else { if ($token_value === 'extract' || $token_value === 'parse_str' || $token_value === 'mb_parse_str') { // add register_globals implementation $this->variable_add('register_globals', array_slice($this->tokens, $vulnstart, $c + $vulnadd), 'register_globals implementation', 0, 0, $line_nr, $i, isset($this->tokens[$i][3]) ? $this->tokens[$i][3] : array()); } } } // add to output if (isset($GLOBALS['user_functions'][$this->file_name][$token_value])) { if (!empty($GLOBALS['output'][$this->file_name])) { foreach ($GLOBALS['output'][$this->file_name] as $block) { $calleesadded = array(); foreach ($block->treenodes as $tree) { if ($tree->funcdepend === $token_value && (array_intersect($tree->funcparamdepend, $vulnparams) || isset($this->scan_functions[$token_value][3]))) { // if funcdependend already found and added, just add foundcallee=true and continue // dont add tree again, it is already added to the vulnblock if (in_array($tree->funcdepend, $calleesadded)) { $tree->foundcallee = true; continue; } if (isset($this->scan_functions[$token_value][3])) { $new_find->title = 'Call triggers vulnerability in function <i>' . $token_value . '()</i>'; } else { if (empty($new_find->title)) { $new_find->title = 'Userinput is passed through function parameters.'; } } $block->treenodes[] = $new_find; if (!$block->vuln && ($parameter_has_userinput || isset($this->scan_functions[$token_value][3]) || $GLOBALS['verbosity'] == 4)) { $block->vuln = true; increaseVulnCounter($block->sink); } $tree->foundcallee = true; $calleesadded[] = $token_value; } } } // else: dont use the result } } else { if (empty($new_find->title)) { $new_find->title = 'Userinput reaches sensitive sink. For more information, press the help icon on the left side.'; } $block = new VulnBlock($this->tif . '_' . $this->tokens[$i][2] . '_' . basename($this->file_pointer), getVulnNodeTitle($token_value), $token_value); $block->treenodes[] = $new_find; if ($parameter_has_userinput || $GLOBALS['verbosity'] == 4) { $block->vuln = true; increaseVulnCounter($token_value); } // if sink in var declare, offer a data leak scan - save infos for that if (isset($vardeclare)) { $block->dataleakvar = array($vardeclare['linenr'], $vardeclare['name']); } $GLOBALS['output'][$this->file_name][] = $block; } } // if classvar depends on function parameter, add this parameter to list if (isset($this->classvar) && $this->in_function && in_array($this->classvar, $this->function_obj->parameters)) { $param = array_search($this->classvar, $this->function_obj->parameters); $GLOBALS['user_functions'][$this->file_name][$this->function_obj->name][0][$param] = $param + 1; } } } // taint analysis } else { if (in_array($token_name, Tokens::$T_LOOP_CONTROL)) { // ignore in requirements output: while, for, foreach // DO..WHILE was rewritten to WHILE in tokenizer $this->ignore_requirement = true; $c = 1; // get variables in loop condition while ($this->tokens[$i + $c] !== '{') { if ($this->tokens[$i + $c][0] === T_VARIABLE) { $this->tokens[$i + $c][3][] = '*'; } else { if (!isset($this->tokens[$i + $c])) { addError('Could not find opening brace after ' . $token_value . '-statement.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->file_pointer); break; } } $c++; } } else { if (in_array($token_name, Tokens::$T_FLOW_CONTROL)) { $c = 1; while ($this->tokens[$i + $c] !== '{') { $c++; if (!isset($this->tokens[$i + $c])) { addError('Could not find opening brace after ' . $token_value . '-statement.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->file_pointer); break; } } $this->in_condition = $i + $c; $this->dependencytokens = array_slice($this->tokens, $i, $c); } else { if ($token_name === T_FUNCTION) { if ($this->in_function) { #addError('New function declaration in function declaration of '.$this->function_obj->name.'() found. This is valid PHP syntax but not supported by RIPS now.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->file_pointer); } else { $this->in_function++; // the next token is the "function name()" $i++; $function_name = isset($this->tokens[$i][1]) ? $this->tokens[$i][1] : $this->tokens[$i + 1][1]; $ref_name = ($this->in_class ? $this->class_name . '::' : '') . $function_name; // add POP gadgets to info if (isset($this->info_functions[$function_name])) { $GLOBALS['info'][] = $ref_name; // add gadget to output $found_line = highlightline(array_slice($this->tokens, $i - 1, 4), $this->comment, $line_nr, $function_name, false, $function_name); $new_find = new InfoTreeNode($found_line); $new_find->title = "POP gadget {$ref_name}"; $new_find->lines[] = $line_nr; $new_find->filename = $this->file_pointer; if (isset($GLOBALS['output'][$this->file_name]['gadgets'])) { $GLOBALS['output'][$this->file_name]['gadgets']->treenodes[] = $new_find; } else { $block = new VulnBlock($this->tif . '_' . $this->tokens[$i][2] . '_' . basename($this->file_pointer), 'POP gadgets'); $block->vuln = true; $block->treenodes[] = $new_find; $GLOBALS['output'][$this->file_name]['gadgets'] = $block; } } $c = 3; while ($this->tokens[$i + $c] !== '{' && $this->tokens[$i + $c] !== ';') { $c++; } // abstract functions ended if ($this->tokens[$i + $c] === ';') { $this->in_function--; } // write to user_functions offset list for referencing in output $GLOBALS['user_functions_offset'][$ref_name][0] = $this->file_pointer; $GLOBALS['user_functions_offset'][$ref_name][1] = $line_nr - 1; // save function as object $this->function_obj = new FunctionDeclare($this->dependencytokens = array_slice($this->tokens, $i - 1, $c + 1)); $this->function_obj->lines[] = $line_nr; $this->function_obj->name = $function_name; // save all function parameters $this->function_obj->parameters = array(); $e = 1; // until function test(...) { // OR // interface test { public function test(...); } while ($this->tokens[$i + $e] !== '{' && $this->tokens[$i + $e] !== ';') { if (is_array($this->tokens[$i + $e]) && $this->tokens[$i + $e][0] === T_VARIABLE) { $this->function_obj->parameters[] = $this->tokens[$i + $e][1]; } $e++; } // now skip the params from rest of scan, // or function test($a=false, $b=false) will be detected as var declaration $i += $e - 1; // -1, because '{' must be evaluated again } } else { if ($token_name === T_GLOBAL && $this->in_function) { $this->globals_from_function[$this->function_obj->name] = array(); // get all globaled variables $b = 1; while ($this->tokens[$i + $b] !== ';') { if ($this->tokens[$i + $b][0] === T_VARIABLE) { // mark variable as global scope affecting $this->put_in_global_scope[] = $this->tokens[$i + $b][1]; // add variable declaration to beginning of varlist $new_var = new VarDeclare(array(array(T_GLOBAL, 'global', $line_nr), array(T_VARIABLE, $this->tokens[$i + $b][1], $line_nr), ';'), $this->comment); $new_var->line = $line_nr; $new_var->id = $i; // overwrite old local vars $this->var_declares_local[$this->tokens[$i + $b][1]] = array($new_var); } $b++; } } else { if ($token_name === T_RETURN && $this->in_function == 1) { $GLOBALS['userfunction_taints'] = false; $GLOBALS['userfunction_secures'] = false; $c = 1; // get all variables in parameter list while ($this->tokens[$i + $c] !== ';') { if (is_array($this->tokens[$i + $c])) { if ($this->tokens[$i + $c][0] === T_VARIABLE) { // check if returned var is secured --> securing function $new_find = new VulnTreeNode(); $userinput = $this->scan_parameter($new_find, $new_find, $this->tokens[$i + $c], $this->tokens[$i + $c][3], $i + $c, $this->var_declares_local, $this->var_declares_global, false, $GLOBALS['F_SECURES_ALL'], TRUE); // add function to securing functions // if it returns no userinput/function param if ((!$userinput || $GLOBALS['userfunction_secures']) && !$this->ignore_securing_function) { $GLOBALS['F_SECURING_STRING'][] = $this->function_obj->name; } // add function to userinput functions if userinput // is fetched in the function and then returned (userinput == 1) if ($userinput == 1 || $GLOBALS['userfunction_taints']) { $this->source_functions[] = $this->function_obj->name; } } else { if (in_array($this->tokens[$i + $c][1], $GLOBALS['F_SECURES_ALL']) || in_array($this->tokens[$i + $c][0], Tokens::$T_CASTS)) { $GLOBALS['F_SECURING_STRING'][] = $this->function_obj->name; break; } } } $c++; } } else { if ($token_name === T_CLASS) { $i++; $this->class_name = $this->tokens[$i][1]; $this->vuln_classes[$this->class_name] = array(); $this->in_class = true; $GLOBALS['info'][] = '<font color="red">Code is object-oriented. This is not supported yet and can lead to false negatives.</font>'; } else { if ($token_name === T_NEW && $this->tokens[$i - 2][0] === T_VARIABLE) { $this->class_vars[$this->tokens[$i - 2][1]] = $this->tokens[$i + 1][1]; } else { if ($token_name === T_EXTENDS && $this->in_class) { $this->vuln_classes[$this->class_name] = $this->vuln_classes[$this->tokens[$i + 1][1]]; } else { if ($token_name === T_LIST) { $d = 2; while ($this->tokens[$i + $d] !== ')' && $this->tokens[$i + $d] !== ';') { $d++; if ($this->tokens[$i + $d] === ';' || !isset($this->tokens[$i + $d])) { addError('Closing parenthesis of list() is missing.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->file_pointer); break; } } $tokenscanstart = 0; if ($this->tokens[$i + $d + 1] === '=' || in_array($this->tokens[$i + $d + 1][0], Tokens::$T_ASSIGNMENT)) { $tokenscanstart = $d + 1; } $c = 2; for ($c = 2; $c < $d; $c++) { if (is_array($this->tokens[$i + $c]) && $this->tokens[$i + $c][0] === T_VARIABLE) { $this->variable_add($this->tokens[$i + $c][1], array_slice($this->tokens, $i, Analyzer::getBraceEnd($this->tokens, $i) + 1), ' list() ', $tokenscanstart, 0, $this->tokens[$i + $c][2], $i, isset($this->tokens[$i + $c][3]) ? $this->tokens[$i + $c][3] : array()); } } $i = $i + $c + 2; } else { if ($token_name === T_INCLUDE_END) { array_pop($this->lines_stack); $this->lines_pointer = end($this->lines_stack); array_pop($this->inc_file_stack); $this->file_pointer = end($this->inc_file_stack); $this->comment = basename($this->file_pointer) == basename($this->file_name) ? '' : basename($this->file_pointer); $this->tif = array_pop($this->tif_stack); } } } } } } } } } } } } } else { /************************* BRACES *************************/ // keep track of { program blocks } // get current dependencies in program flow if ($this->tokens[$i] === '{' && ($this->tokens[$i - 1] === ')' || $this->tokens[$i - 1] === ':' || $this->tokens[$i - 1] === ';' || is_array($this->tokens[$i - 1]) && ($this->tokens[$i - 1][0] === T_DO || $this->tokens[$i - 1][0] === T_ELSE || $this->tokens[$i - 1][0] === T_STRING || $this->tokens[$i - 1][0] === T_TRY || $this->tokens[$i - 1][0] === T_CATCH))) { // save brace amount at start of function if ($this->in_function && $this->brace_save_func < 0) { $this->brace_save_func = $this->braces_open; } // save brace amount at start of class if ($this->in_class && $this->brace_save_class < 0) { $this->brace_save_class = $this->braces_open; } $this->in_condition = 0; if (empty($e)) { if (!$this->ignore_requirement) { if (!empty($this->dependencytokens) && $this->dependencytokens[0][0] === T_ELSE && $this->dependencytokens[1][0] !== T_IF) { $this->dependencytokens = $this->last_dependency; $this->dependencytokens[] = array(T_ELSE, 'else', $this->dependencytokens[0][2]); } } else { $this->ignore_requirement = false; } // add dependency (even push empty dependency on stack, it will get poped again) $this->dependencies[$line_nr] = $this->dependencytokens; $this->dependencytokens = array(); } else { unset($e); } $this->braces_open++; } else { if ($this->tokens[$i] === '}' && ($this->tokens[$i - 1] === ';' || $this->tokens[$i - 1] === '}' || $this->tokens[$i - 1] === '{')) { $this->braces_open--; // delete current dependency $this->last_dependency = array_pop($this->dependencies); $this->dependencytokens = array(); // end of function found if brace amount = amount before function start if ($this->in_function && $this->brace_save_func === $this->braces_open) { $ref_name = ($this->in_class ? $this->class_name . '::' : '') . $this->function_obj->name; // write ending to user_function list for referencing functions in output $GLOBALS['user_functions_offset'][$ref_name][2] = $line_nr; // reset vars for next function declaration $this->brace_save_func = -1; $this->ignore_securing_function = false; $this->in_function--; $this->function_obj = null; $this->var_declares_local = array(); $this->put_in_global_scope = array(); // load new found vulnerable user functions to current scanlist if (isset($GLOBALS['user_functions'][$this->file_name])) { $this->scan_functions = array_merge($this->scan_functions, $GLOBALS['user_functions'][$this->file_name]); } } // end of class found if ($this->in_class && $this->brace_save_class === $this->braces_open) { $this->brace_save_class = -1; $this->in_class = false; } } } } // token scanned // detect if still in a vardeclare, otherwise delete saved infos if (isset($vardeclare) && $vardeclare['end'] === $i) { unset($vardeclare); } } // all tokens scanned. return $this->inc_map; }
/** ----------------------------------------------------------------------+ * @desc Return location of previous meaningfull token * @param int * @return Int ----------------------------------------------------------------------+ */ protected function prev($pos) { $i = $pos; while ($i >= 0) { if (Tokenizer::meaningfull($this->tokens[--$i][0])) { return $i; } } }
/** * @param string $code * @return string[] */ private function getClassNames($code) { $tokenizer = new Tokenizer(); return $tokenizer->getInstantiableClassNames($code); }