Beispiel #1
0
 /**
  * Creates an array of tokens when given some PHP code.
  *
  * Starts by using token_get_all() but does a lot of extra processing
  * to insert information about the context of the token.
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     $tokens = @token_get_all($string);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $insideInlineIf = false;
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         $tokenIsArray = is_array($token);
         /*
             If we are using \r\n newline characters, the \r and \n are sometimes
             split over two tokens. This normally occurs after comments. We need
             to merge these two characters together so that our line endings are
             consistent for all lines.
         */
         if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
             if (isset($tokens[$stackPtr + 1]) === true && is_array($tokens[$stackPtr + 1]) === true && $tokens[$stackPtr + 1][1][0] === "\n") {
                 $token[1] .= "\n";
                 if ($tokens[$stackPtr + 1][1] === "\n") {
                     // The next token's content has been merged into this token,
                     // so we can skip it.
                     $stackPtr++;
                 } else {
                     $tokens[$stackPtr + 1][1] = substr($tokens[$stackPtr + 1][1], 1);
                 }
             }
         }
         //end if
         /*
             If this is a double quoted string, PHP will tokenise the whole
             thing which causes problems with the scope map when braces are
             within the string. So we need to merge the tokens together to
             provide a single string.
         */
         if ($tokenIsArray === false && $token === '"') {
             $tokenContent = '"';
             $nestedVars = array();
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                     if ($tokens[$i][1] === '{' && $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE) {
                         $nestedVars[] = $i;
                     }
                 } else {
                     $tokenContent .= $tokens[$i];
                     if ($tokens[$i] === '}') {
                         array_pop($nestedVars);
                     }
                 }
                 if ($subTokenIsArray === false && $tokens[$i] === '"' && empty($nestedVars) === true) {
                     // We found the other end of the double quoted string.
                     break;
                 }
             }
             $stackPtr = $i;
             // Convert each line within the double quoted string to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
                 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             If this is a heredoc, PHP will tokenise the whole
             thing which causes problems when heredocs don't
             contain real PHP code, which is almost never.
             We want to leave the start and end heredoc tokens
             alone though.
         */
         if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
             // Add the start heredoc token to the final array.
             $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($token);
             // Check if this is actually a nowdoc and use a different token
             // to help the sniffs.
             $nowdoc = false;
             if ($token[1][3] === "'") {
                 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             $tokenContent = '';
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
                     // We found the other end of the heredoc.
                     break;
                 }
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                 } else {
                     $tokenContent .= $tokens[$i];
                 }
             }
             $stackPtr = $i;
             // Convert each line within the heredoc to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 if ($nowdoc === true) {
                     $newToken['code'] = T_NOWDOC;
                     $newToken['type'] = 'T_NOWDOC';
                 } else {
                     $newToken['code'] = T_HEREDOC;
                     $newToken['type'] = 'T_HEREDOC';
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Add the end heredoc token to the final array.
             $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
             if ($nowdoc === true) {
                 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             If this token has newlines in its content, split each line up
             and create a new token for each line. We do this so it's easier
             to asertain where errors occur on a line.
             Note that $token[1] is the token's content.
         */
         if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
             $tokenLines = explode($eolChar, $token[1]);
             $numLines = count($tokenLines);
             $tokenName = token_name($token[0]);
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['type'] = $tokenName;
                 $newToken['code'] = $token[0];
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             $newToken = PHP_CodeSniffer::standardiseToken($token);
             // Convert colons that are actually the ELSE component of an
             // inline IF statement.
             if ($newToken['code'] === T_INLINE_THEN) {
                 $insideInlineIf = true;
             } else {
                 if ($insideInlineIf === true && $newToken['code'] === T_COLON) {
                     $insideInlineIf = false;
                     $newToken['code'] = T_INLINE_ELSE;
                     $newToken['type'] = 'T_INLINE_ELSE';
                 }
             }
             // This is a special condition for T_ARRAY tokens used for
             // type hinting function arguments as being arrays. We want to keep
             // the parenthsis map clean, so let's tag these tokens as
             // T_ARRAY_HINT.
             if ($newToken['code'] === T_ARRAY) {
                 // Recalculate number of tokens.
                 $numTokens = count($tokens);
                 for ($i = $stackPtr; $i < $numTokens; $i++) {
                     if (is_array($tokens[$i]) === false) {
                         if ($tokens[$i] === '(') {
                             break;
                         }
                     } else {
                         if ($tokens[$i][0] === T_VARIABLE) {
                             $newToken['code'] = T_ARRAY_HINT;
                             $newToken['type'] = 'T_ARRAY_HINT';
                             break;
                         }
                     }
                 }
             }
             $finalTokens[$newStackPtr] = $newToken;
             $newStackPtr++;
         }
         //end if
     }
     //end for
     return $finalTokens;
 }
Beispiel #2
0
 /**
  * Creates an array of tokens when given some PHP code.
  *
  * Starts by using token_get_all() but does a lot of extra processing
  * to insert information about the context of the token.
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     $tokens = @token_get_all($string);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $lastNotEmptyToken = 0;
     $insideInlineIf = false;
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         $tokenIsArray = is_array($token);
         if ($newStackPtr > 0 && $finalTokens[$newStackPtr - 1]['code'] !== T_WHITESPACE) {
             $lastNotEmptyToken = $newStackPtr - 1;
         }
         /*
             If we are using \r\n newline characters, the \r and \n are sometimes
             split over two tokens. This normally occurs after comments. We need
             to merge these two characters together so that our line endings are
             consistent for all lines.
         */
         if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
             if (isset($tokens[$stackPtr + 1]) === true && is_array($tokens[$stackPtr + 1]) === true && $tokens[$stackPtr + 1][1][0] === "\n") {
                 $token[1] .= "\n";
                 if ($tokens[$stackPtr + 1][1] === "\n") {
                     // The next token's content has been merged into this token,
                     // so we can skip it.
                     $stackPtr++;
                 } else {
                     $tokens[$stackPtr + 1][1] = substr($tokens[$stackPtr + 1][1], 1);
                 }
             }
         }
         //end if
         /*
             If this is a double quoted string, PHP will tokenise the whole
             thing which causes problems with the scope map when braces are
             within the string. So we need to merge the tokens together to
             provide a single string.
         */
         if ($tokenIsArray === false && $token === '"') {
             $tokenContent = '"';
             $nestedVars = array();
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                     if ($tokens[$i][1] === '{' && $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE) {
                         $nestedVars[] = $i;
                     }
                 } else {
                     $tokenContent .= $tokens[$i];
                     if ($tokens[$i] === '}') {
                         array_pop($nestedVars);
                     }
                 }
                 if ($subTokenIsArray === false && $tokens[$i] === '"' && empty($nestedVars) === true) {
                     // We found the other end of the double quoted string.
                     break;
                 }
             }
             $stackPtr = $i;
             // Convert each line within the double quoted string to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
                 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             If this is a heredoc, PHP will tokenise the whole
             thing which causes problems when heredocs don't
             contain real PHP code, which is almost never.
             We want to leave the start and end heredoc tokens
             alone though.
         */
         if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
             // Add the start heredoc token to the final array.
             $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($token);
             // Check if this is actually a nowdoc and use a different token
             // to help the sniffs.
             $nowdoc = false;
             if ($token[1][3] === "'") {
                 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             $tokenContent = '';
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
                     // We found the other end of the heredoc.
                     break;
                 }
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                 } else {
                     $tokenContent .= $tokens[$i];
                 }
             }
             $stackPtr = $i;
             // Convert each line within the heredoc to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 if ($nowdoc === true) {
                     $newToken['code'] = T_NOWDOC;
                     $newToken['type'] = 'T_NOWDOC';
                 } else {
                     $newToken['code'] = T_HEREDOC;
                     $newToken['type'] = 'T_HEREDOC';
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Add the end heredoc token to the final array.
             $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
             if ($nowdoc === true) {
                 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             PHP doesn't assign a token to goto labels, so we have to.
             These are just string tokens with a single colon after them. Double
             colons are already tokenized and so don't interfere with this check.
             But we do have to account for CASE statements, that look just like
             goto labels.
         */
         if ($tokenIsArray === true && $token[0] === T_STRING && $tokens[$stackPtr + 1] === ':' && $tokens[$stackPtr - 1][0] !== T_PAAMAYIM_NEKUDOTAYIM) {
             $stopTokens = array(T_CASE, T_SEMICOLON, T_OPEN_CURLY_BRACKET, T_INLINE_THEN);
             for ($x = $newStackPtr - 1; $x > 0; $x--) {
                 if (in_array($finalTokens[$x]['code'], $stopTokens) === true) {
                     break;
                 }
             }
             if ($finalTokens[$x]['code'] !== T_CASE && $finalTokens[$x]['code'] !== T_INLINE_THEN) {
                 $finalTokens[$newStackPtr] = array('content' => $token[1] . ':', 'code' => T_GOTO_LABEL, 'type' => 'T_GOTO_LABEL');
                 $newStackPtr++;
                 $stackPtr++;
                 continue;
             }
         }
         //end if
         /*
             If this token has newlines in its content, split each line up
             and create a new token for each line. We do this so it's easier
             to ascertain where errors occur on a line.
             Note that $token[1] is the token's content.
         */
         if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
             $tokenLines = explode($eolChar, $token[1]);
             $numLines = count($tokenLines);
             $tokenName = token_name($token[0]);
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['type'] = $tokenName;
                 $newToken['code'] = $token[0];
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             if ($tokenIsArray === true && $token[0] === T_STRING) {
                 // Some T_STRING tokens should remain that way
                 // due to their context.
                 $context = array(T_OBJECT_OPERATOR => true, T_FUNCTION => true, T_CLASS => true, T_EXTENDS => true, T_IMPLEMENTS => true, T_NEW => true, T_CONST => true, T_NS_SEPARATOR => true, T_USE => true, T_NAMESPACE => true, T_PAAMAYIM_NEKUDOTAYIM => true);
                 if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) {
                     $finalTokens[$newStackPtr] = array('content' => $token[1], 'code' => T_STRING, 'type' => 'T_STRING');
                     $newStackPtr++;
                     continue;
                 }
             }
             //end if
             $newToken = PHP_CodeSniffer::standardiseToken($token);
             // Convert colons that are actually the ELSE component of an
             // inline IF statement.
             if ($newToken['code'] === T_INLINE_THEN) {
                 $insideInlineIf = true;
             } else {
                 if ($insideInlineIf === true && $newToken['code'] === T_COLON) {
                     $insideInlineIf = false;
                     $newToken['code'] = T_INLINE_ELSE;
                     $newToken['type'] = 'T_INLINE_ELSE';
                 }
             }
             // This is a special condition for T_ARRAY tokens used for
             // type hinting function arguments as being arrays. We want to keep
             // the parenthesis map clean, so let's tag these tokens as
             // T_ARRAY_HINT.
             if ($newToken['code'] === T_ARRAY) {
                 // Recalculate number of tokens.
                 $numTokens = count($tokens);
                 for ($i = $stackPtr; $i < $numTokens; $i++) {
                     if (is_array($tokens[$i]) === false) {
                         if ($tokens[$i] === '(') {
                             break;
                         }
                     } else {
                         if ($tokens[$i][0] === T_VARIABLE) {
                             $newToken['code'] = T_ARRAY_HINT;
                             $newToken['type'] = 'T_ARRAY_HINT';
                             break;
                         }
                     }
                 }
             }
             // This is a special case for the PHP 5.5 classname::class syntax
             // where "class" should be T_STRING instead of T_CLASS.
             if ($newToken['code'] === T_CLASS && $finalTokens[$newStackPtr - 1]['code'] === T_DOUBLE_COLON) {
                 $newToken['code'] = T_STRING;
                 $newToken['type'] = 'T_STRING';
             }
             $finalTokens[$newStackPtr] = $newToken;
             $newStackPtr++;
         }
         //end if
     }
     //end for
     return $finalTokens;
 }
 /**
  * Creates a token pattern.
  *
  * @param string $str The tokens string that the pattern should match.
  *
  * @return array The pattern step.
  * @see _createSkipPattern()
  * @see _parse()
  */
 private function _createTokenPattern($str)
 {
     // Don't add a space after the closing php tag as it will add a new
     // whitespace token.
     $tokens = token_get_all('<?php ' . $str . '?>');
     // Remove the <?php tag from the front and the end php tag from the back.
     $tokens = array_slice($tokens, 1, count($tokens) - 2);
     foreach ($tokens as &$token) {
         $token = PHP_CodeSniffer::standardiseToken($token);
     }
     $patterns = array();
     foreach ($tokens as $patternInfo) {
         $patterns[] = array('type' => 'token', 'token' => $patternInfo['code'], 'value' => $patternInfo['content']);
     }
     return $patterns;
 }
Beispiel #4
0
 /**
  * Processes the file and runs the PHP_CodeSniffer sniffs to verify that it
  * conforms with the tests.
  *
  * @return void
  * @throws PHP_CodeSniffer_Exception If the file could not be processed.
  */
 private function _parse()
 {
     $contents = file_get_contents($this->_file);
     $tokens = token_get_all($contents);
     $newStackPtr = 0;
     $numTokens = count($tokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         // If this is a double quoted string, PHP will tokenise the whole
         // thing which causes problems with the scope map when braces are
         // within the string. So we need to merge the tokens together to
         // provide a single string.
         if (is_array($token) === false && $token === '"') {
             $tokenContent = '"';
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 if (is_array($tokens[$i]) === true) {
                     $tokenContent .= $tokens[$i][1];
                 } else {
                     $tokenContent .= $tokens[$i];
                 }
                 if (is_array($tokens[$i]) === false && $tokens[$i] === '"') {
                     // We found the other end of the double quoted string.
                     break;
                 }
             }
             $stackPtr = $i;
             // Convert each line within the double quoted string to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode("\n", $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= "\n";
                 }
                 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
                 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
                 $this->_tokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         // If this token has newlines in its content, split each line up
         // and create a new token for each line. We do this so it's easier
         // to asertain where errors occur on a line.
         if (is_array($token) === true && strpos($token[1], "\n") !== false) {
             $tokenLines = explode("\n", $token[1]);
             $numLines = count($tokenLines);
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= "\n";
                 }
                 $newToken['type'] = token_name($token[0]);
                 $newToken['code'] = $token[0];
                 $this->_tokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             $newToken = PHP_CodeSniffer::standardiseToken($token);
             // This is a special condition for T_ARRAY tokens use to
             // type hint function arguments as being arrays. We want to keep
             // the parenthsis map clean, so let's tag these tokens as
             // T_ARRAY_HINT.
             if ($newToken['code'] === T_ARRAY) {
                 // Recalculate number of tokens.
                 $numTokens = count($tokens);
                 for ($i = $stackPtr; $i < $numTokens; $i++) {
                     if (is_array($tokens[$i]) === false) {
                         if ($tokens[$i] === '(') {
                             break;
                         }
                     } else {
                         if ($tokens[$i][0] === T_VARIABLE) {
                             $newToken['code'] = T_ARRAY_HINT;
                             $newToken['type'] = 'T_ARRAY_HINT';
                             break;
                         }
                     }
                 }
             }
             $this->_tokens[$newStackPtr] = $newToken;
             $newStackPtr++;
         }
         //end if
     }
     //end for
     $this->_createLineMap();
     $this->_createParenthesisMap();
     $this->_createParenthesisNestingMap();
     $this->_createScopeMap();
     // Column map requires the line map to be complete.
     $this->_createColumnMap();
     $this->_createLevelMap();
     if (PHP_CODESNIFFER_VERBOSITY > 0) {
         $numTokens = count($this->_tokens);
         $numLines = $this->_tokens[$numTokens - 1]['line'];
         echo "[{$numTokens} tokens in {$numLines} lines]... ";
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             echo PHP_EOL;
         }
     }
 }