PHP PHP_CodeSniffer_Tokenizers_Comment, PHP_CodeSniffer Examples

Programming Language: PHP

Class/Type: PHP_CodeSniffer_Tokenizers_Comment

Examples at hotexamples.com: 3

PHP PHP_CodeSniffer_Tokenizers_Comment - 3 examples found. These are the top rated real world PHP examples of PHP_CodeSniffer_Tokenizers_Comment from package PHP_CodeSniffer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

tokenizeString(3)

See link: http://pear.php.net/package/PHP_CodeSniffer

Author: Greg Sherwood (gsherwood@squiz.net)

PHP_CodeSniffer_Tokenizers_Comment Class Documentation

Example #1

Show file

File: JS.php Project: itliuchang/test

 /**
  * Creates an array of tokens when given some PHP code.
  *
  * Starts by using token_get_all() but does a lot of extra processing
  * to insert information about the context of the token.
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** START JS TOKENIZING ***" . PHP_EOL;
     }
     $maxTokenLength = 0;
     foreach ($this->tokenValues as $token => $values) {
         if (strlen($token) > $maxTokenLength) {
             $maxTokenLength = strlen($token);
         }
     }
     $tokens = array();
     $inString = '';
     $stringChar = null;
     $inComment = '';
     $buffer = '';
     $preStringBuffer = '';
     $cleanBuffer = false;
     $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
     $tokens[] = array('code' => T_OPEN_TAG, 'type' => 'T_OPEN_TAG', 'content' => '');
     // Convert newlines to single characters for ease of
     // processing. We will change them back later.
     $string = str_replace($eolChar, "\n", $string);
     $chars = str_split($string);
     $numChars = count($chars);
     for ($i = 0; $i < $numChars; $i++) {
         $char = $chars[$i];
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             $content = PHP_CodeSniffer::prepareForOutput($char);
             $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
             if ($inString !== '') {
                 echo "\t";
             }
             if ($inComment !== '') {
                 echo "\t";
             }
             echo "\tProcess char {$i} => {$content} (buffer: {$bufferContent})" . PHP_EOL;
         }
         //end if
         if ($inString === '' && $inComment === '' && $buffer !== '') {
             // If the buffer only has whitespace and we are about to
             // add a character, store the whitespace first.
             if (trim($char) !== '' && trim($buffer) === '') {
                 $tokens[] = array('code' => T_WHITESPACE, 'type' => 'T_WHITESPACE', 'content' => str_replace("\n", $eolChar, $buffer));
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     $content = PHP_CodeSniffer::prepareForOutput($buffer);
                     echo "\t=> Added token T_WHITESPACE ({$content})" . PHP_EOL;
                 }
                 $buffer = '';
             }
             // If the buffer is not whitespace and we are about to
             // add a whitespace character, store the content first.
             if ($inString === '' && $inComment === '' && trim($char) === '' && trim($buffer) !== '') {
                 $tokens[] = array('code' => T_STRING, 'type' => 'T_STRING', 'content' => str_replace("\n", $eolChar, $buffer));
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     $content = PHP_CodeSniffer::prepareForOutput($buffer);
                     echo "\t=> Added token T_STRING ({$content})" . PHP_EOL;
                 }
                 $buffer = '';
             }
         }
         //end if
         // Process strings.
         if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
             if ($inString === $char) {
                 // This could be the end of the string, but make sure it
                 // is not escaped first.
                 $escapes = 0;
                 for ($x = $i - 1; $x >= 0; $x--) {
                     if ($chars[$x] !== '\\') {
                         break;
                     }
                     $escapes++;
                 }
                 if ($escapes === 0 || $escapes % 2 === 0) {
                     // There is an even number escape chars,
                     // so this is not escaped, it is the end of the string.
                     $tokens[] = array('code' => T_CONSTANT_ENCAPSED_STRING, 'type' => 'T_CONSTANT_ENCAPSED_STRING', 'content' => str_replace("\n", $eolChar, $buffer) . $char);
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         echo "\t\t* found end of string *" . PHP_EOL;
                         $content = PHP_CodeSniffer::prepareForOutput($buffer . $char);
                         echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ({$content})" . PHP_EOL;
                     }
                     $buffer = '';
                     $preStringBuffer = '';
                     $inString = '';
                     $stringChar = null;
                     continue;
                 }
                 //end if
             } else {
                 if ($inString === '') {
                     $inString = $char;
                     $stringChar = $i;
                     $preStringBuffer = $buffer;
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         echo "\t\t* looking for string closer *" . PHP_EOL;
                     }
                 }
             }
             //end if
         }
         //end if
         if ($inString !== '' && $char === "\n") {
             // Unless this newline character is escaped, the string did not
             // end before the end of the line, which means it probably
             // wasn't a string at all (maybe a regex).
             if ($chars[$i - 1] !== '\\') {
                 $i = $stringChar;
                 $buffer = $preStringBuffer;
                 $preStringBuffer = '';
                 $inString = '';
                 $stringChar = null;
                 $char = $chars[$i];
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* found newline before end of string, bailing *" . PHP_EOL;
                 }
             }
         }
         $buffer .= $char;
         // We don't look for special tokens inside strings,
         // so if we are in a string, we can continue here now
         // that the current char is in the buffer.
         if ($inString !== '') {
             continue;
         }
         // Special case for T_DIVIDE which can actually be
         // the start of a regular expression.
         if ($buffer === $char && $char === '/') {
             $regex = $this->getRegexToken($i, $string, $chars, $tokens, $eolChar);
             if ($regex !== null) {
                 $tokens[] = array('code' => T_REGULAR_EXPRESSION, 'type' => 'T_REGULAR_EXPRESSION', 'content' => $regex['content']);
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
                     echo "\t=> Added token T_REGULAR_EXPRESSION ({$content})" . PHP_EOL;
                 }
                 $i = $regex['end'];
                 $buffer = '';
                 $cleanBuffer = false;
                 continue;
             }
             //end if
         }
         //end if
         // Check for known tokens, but ignore tokens found that are not at
         // the end of a string, like FOR and this.FORmat.
         if (isset($this->tokenValues[strtolower($buffer)]) === true && (preg_match('|[a-zA-z0-9_]|', $char) === 0 || isset($chars[$i + 1]) === false || preg_match('|[a-zA-z0-9_]|', $chars[$i + 1]) === 0)) {
             $matchedToken = false;
             $lookAheadLength = $maxTokenLength - strlen($buffer);
             if ($lookAheadLength > 0) {
                 // The buffer contains a token type, but we need
                 // to look ahead at the next chars to see if this is
                 // actually part of a larger token. For example,
                 // FOR and FOREACH.
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* buffer possibly contains token, looking ahead {$lookAheadLength} chars *" . PHP_EOL;
                 }
                 $charBuffer = $buffer;
                 for ($x = 1; $x <= $lookAheadLength; $x++) {
                     if (isset($chars[$i + $x]) === false) {
                         break;
                     }
                     $charBuffer .= $chars[$i + $x];
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
                         echo "\t\t=> Looking ahead {$x} chars => {$content}" . PHP_EOL;
                     }
                     if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
                         // We've found something larger that matches
                         // so we can ignore this char. Except for 1 very specific
                         // case where a comment like /**/ needs to tokenize as
                         // T_COMMENT and not T_DOC_COMMENT.
                         $oldType = $this->tokenValues[strtolower($buffer)];
                         $newType = $this->tokenValues[strtolower($charBuffer)];
                         if ($oldType === 'T_COMMENT' && $newType === 'T_DOC_COMMENT' && $chars[$i + $x + 1] === '/') {
                             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                                 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *" . PHP_EOL;
                             }
                         } else {
                             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                                 echo "\t\t* look ahead found more specific token ({$newType}), ignoring {$i} *" . PHP_EOL;
                             }
                             $matchedToken = true;
                             break;
                         }
                     }
                     //end if
                 }
                 //end for
             }
             //end if
             if ($matchedToken === false) {
                 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
                     echo "\t\t* look ahead found nothing *" . PHP_EOL;
                 }
                 $value = $this->tokenValues[strtolower($buffer)];
                 $tokens[] = array('code' => constant($value), 'type' => $value, 'content' => $buffer);
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     $content = PHP_CodeSniffer::prepareForOutput($buffer);
                     echo "\t=> Added token {$value} ({$content})" . PHP_EOL;
                 }
                 $cleanBuffer = true;
             }
             //end if
         } else {
             if (isset($this->tokenValues[strtolower($char)]) === true) {
                 // No matter what token we end up using, we don't
                 // need the content in the buffer any more because we have
                 // found a valid token.
                 $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
                 if ($newContent !== '') {
                     $tokens[] = array('code' => T_STRING, 'type' => 'T_STRING', 'content' => $newContent);
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
                         echo "\t=> Added token T_STRING ({$content})" . PHP_EOL;
                     }
                 }
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* char is token, looking ahead " . ($maxTokenLength - 1) . ' chars *' . PHP_EOL;
                 }
                 // The char is a token type, but we need to look ahead at the
                 // next chars to see if this is actually part of a larger token.
                 // For example, = and ===.
                 $charBuffer = $char;
                 $matchedToken = false;
                 for ($x = 1; $x <= $maxTokenLength; $x++) {
                     if (isset($chars[$i + $x]) === false) {
                         break;
                     }
                     $charBuffer .= $chars[$i + $x];
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
                         echo "\t\t=> Looking ahead {$x} chars => {$content}" . PHP_EOL;
                     }
                     if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
                         // We've found something larger that matches
                         // so we can ignore this char.
                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
                             $type = $this->tokenValues[strtolower($charBuffer)];
                             echo "\t\t* look ahead found more specific token ({$type}), ignoring {$i} *" . PHP_EOL;
                         }
                         $matchedToken = true;
                         break;
                     }
                 }
                 //end for
                 if ($matchedToken === false) {
                     $value = $this->tokenValues[strtolower($char)];
                     $tokens[] = array('code' => constant($value), 'type' => $value, 'content' => $char);
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         echo "\t\t* look ahead found nothing *" . PHP_EOL;
                         $content = PHP_CodeSniffer::prepareForOutput($char);
                         echo "\t=> Added token {$value} ({$content})" . PHP_EOL;
                     }
                     $cleanBuffer = true;
                 } else {
                     $buffer = $char;
                 }
                 //end if
             }
         }
         //end if
         // Keep track of content inside comments.
         if ($inComment === '' && array_key_exists($buffer, $this->commentTokens) === true) {
             // This is not really a comment if the content
             // looks like \// (i.e., it is escaped).
             if (isset($chars[$i - 2]) === true && $chars[$i - 2] === '\\') {
                 $lastToken = array_pop($tokens);
                 $lastContent = $lastToken['content'];
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     $value = $this->tokenValues[strtolower($lastContent)];
                     $content = PHP_CodeSniffer::prepareForOutput($lastContent);
                     echo "\t=> Removed token {$value} ({$content})" . PHP_EOL;
                 }
                 $lastChars = str_split($lastContent);
                 $lastNumChars = count($lastChars);
                 for ($x = 0; $x < $lastNumChars; $x++) {
                     $lastChar = $lastChars[$x];
                     $value = $this->tokenValues[strtolower($lastChar)];
                     $tokens[] = array('code' => constant($value), 'type' => $value, 'content' => $lastChar);
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         $content = PHP_CodeSniffer::prepareForOutput($lastChar);
                         echo "\t=> Added token {$value} ({$content})" . PHP_EOL;
                     }
                 }
             } else {
                 // We have started a comment.
                 $inComment = $buffer;
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* looking for end of comment *" . PHP_EOL;
                 }
             }
             //end if
         } else {
             if ($inComment !== '') {
                 if ($this->commentTokens[$inComment] === null) {
                     // Comment ends at the next newline.
                     if (strpos($buffer, "\n") !== false) {
                         $inComment = '';
                     }
                 } else {
                     if ($this->commentTokens[$inComment] === $buffer) {
                         $inComment = '';
                     }
                 }
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     if ($inComment === '') {
                         echo "\t\t* found end of comment *" . PHP_EOL;
                     }
                 }
                 if ($inComment === '' && $cleanBuffer === false) {
                     $tokens[] = array('code' => T_STRING, 'type' => 'T_STRING', 'content' => str_replace("\n", $eolChar, $buffer));
                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
                         $content = PHP_CodeSniffer::prepareForOutput($buffer);
                         echo "\t=> Added token T_STRING ({$content})" . PHP_EOL;
                     }
                     $buffer = '';
                 }
             }
         }
         //end if
         if ($cleanBuffer === true) {
             $buffer = '';
             $cleanBuffer = false;
         }
     }
     //end for
     if (empty($buffer) === false) {
         // Buffer contains whitespace from the end of the file.
         $tokens[] = array('code' => T_WHITESPACE, 'type' => 'T_WHITESPACE', 'content' => str_replace("\n", $eolChar, $buffer));
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             $content = PHP_CodeSniffer::prepareForOutput($buffer);
             echo "\t=> Added token T_WHITESPACE ({$content})" . PHP_EOL;
         }
     }
     $tokens[] = array('code' => T_CLOSE_TAG, 'type' => 'T_CLOSE_TAG', 'content' => '');
     /*
         Now that we have done some basic tokenizing, we need to
         modify the tokens to join some together and split some apart
         so they match what the PHP tokenizer does.
     */
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         /*
             Look for comments and join the tokens together.
         */
         if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
             $newContent = '';
             $tokenContent = $token['content'];
             $endContent = $this->commentTokens[$tokenContent];
             while ($tokenContent !== $endContent) {
                 if ($endContent === null && strpos($tokenContent, $eolChar) !== false) {
                     // A null end token means the comment ends at the end of
                     // the line so we look for newlines and split the token.
                     $tokens[$stackPtr]['content'] = substr($tokenContent, strpos($tokenContent, $eolChar) + strlen($eolChar));
                     $tokenContent = substr($tokenContent, 0, strpos($tokenContent, $eolChar) + strlen($eolChar));
                     // If the substr failed, skip the token as the content
                     // will now be blank.
                     if ($tokens[$stackPtr]['content'] !== false) {
                         $stackPtr--;
                     }
                     break;
                 }
                 //end if
                 $stackPtr++;
                 $newContent .= $tokenContent;
                 if (isset($tokens[$stackPtr]) === false) {
                     break;
                 }
                 $tokenContent = $tokens[$stackPtr]['content'];
             }
             //end while
             if ($token['code'] === T_DOC_COMMENT) {
                 $commentTokens = $commentTokenizer->tokenizeString($newContent . $tokenContent, $eolChar, $newStackPtr);
                 foreach ($commentTokens as $commentToken) {
                     $finalTokens[$newStackPtr] = $commentToken;
                     $newStackPtr++;
                 }
                 continue;
             } else {
                 // Save the new content in the current token so
                 // the code below can chop it up on newlines.
                 $token['content'] = $newContent . $tokenContent;
             }
         }
         //end if
         /*
             If this token has newlines in its content, split each line up
             and create a new token for each line. We do this so it's easier
             to ascertain where errors occur on a line.
             Note that $token[1] is the token's content.
         */
         if (strpos($token['content'], $eolChar) !== false) {
             $tokenLines = explode($eolChar, $token['content']);
             $numLines = count($tokenLines);
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['type'] = $token['type'];
                 $newToken['code'] = $token['code'];
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             $finalTokens[$newStackPtr] = $token;
             $newStackPtr++;
         }
         //end if
         // Convert numbers, including decimals.
         if ($token['code'] === T_STRING || $token['code'] === T_OBJECT_OPERATOR) {
             $newContent = '';
             $oldStackPtr = $stackPtr;
             while (preg_match('|^[0-9\\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
                 $newContent .= $tokens[$stackPtr]['content'];
                 $stackPtr++;
             }
             if ($newContent !== '' && $newContent !== '.') {
                 $finalTokens[$newStackPtr - 1]['content'] = $newContent;
                 if (ctype_digit($newContent) === true) {
                     $finalTokens[$newStackPtr - 1]['code'] = constant('T_LNUMBER');
                     $finalTokens[$newStackPtr - 1]['type'] = 'T_LNUMBER';
                 } else {
                     $finalTokens[$newStackPtr - 1]['code'] = constant('T_DNUMBER');
                     $finalTokens[$newStackPtr - 1]['type'] = 'T_DNUMBER';
                 }
                 $stackPtr--;
             } else {
                 $stackPtr = $oldStackPtr;
             }
         }
         //end if
     }
     //end for
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** END TOKENIZING ***" . PHP_EOL;
     }
     return $finalTokens;
 }

Example #2

Show file

File: PHP.php Project: itliuchang/test

 /**
  * Creates an array of tokens when given some PHP code.
  *
  * Starts by using token_get_all() but does a lot of extra processing
  * to insert information about the context of the token.
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** START PHP TOKENIZING ***" . PHP_EOL;
         $isWin = false;
         if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
             $isWin = true;
         }
     }
     $tokens = @token_get_all($string);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $lastNotEmptyToken = 0;
     $insideInlineIf = array();
     $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = (array) $tokens[$stackPtr];
         $tokenIsArray = isset($token[1]);
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             if ($tokenIsArray === true) {
                 $type = token_name($token[0]);
                 $content = PHP_CodeSniffer::prepareForOutput($token[1]);
             } else {
                 $newToken = self::resolveSimpleToken($token[0]);
                 $type = $newToken['type'];
                 $content = PHP_CodeSniffer::prepareForOutput($token[0]);
             }
             echo "\tProcess token ";
             if ($tokenIsArray === true) {
                 echo "[{$stackPtr}]";
             } else {
                 echo " {$stackPtr} ";
             }
             echo ": {$type} => {$content}";
         }
         //end if
         if ($newStackPtr > 0 && $finalTokens[$newStackPtr - 1]['code'] !== T_WHITESPACE) {
             $lastNotEmptyToken = $newStackPtr - 1;
         }
         /*
             If we are using \r\n newline characters, the \r and \n are sometimes
             split over two tokens. This normally occurs after comments. We need
             to merge these two characters together so that our line endings are
             consistent for all lines.
         */
         if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
             if (isset($tokens[$stackPtr + 1]) === true && is_array($tokens[$stackPtr + 1]) === true && $tokens[$stackPtr + 1][1][0] === "\n") {
                 $token[1] .= "\n";
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     if ($isWin === true) {
                         echo '\\n';
                     } else {
                         echo "[30;1m\\n[0m";
                     }
                 }
                 if ($tokens[$stackPtr + 1][1] === "\n") {
                     // This token's content has been merged into the previous,
                     // so we can skip it.
                     $tokens[$stackPtr + 1] = '';
                 } else {
                     $tokens[$stackPtr + 1][1] = substr($tokens[$stackPtr + 1][1], 1);
                 }
             }
         }
         //end if
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             echo PHP_EOL;
         }
         /*
             Parse doc blocks into something that can be easily iterated over.
         */
         if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) {
             $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr);
             foreach ($commentTokens as $commentToken) {
                 $finalTokens[$newStackPtr] = $commentToken;
                 $newStackPtr++;
             }
             continue;
         }
         /*
             If this is a double quoted string, PHP will tokenize the whole
             thing which causes problems with the scope map when braces are
             within the string. So we need to merge the tokens together to
             provide a single string.
         */
         if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) {
             // Binary casts need a special token.
             if ($token[0] === 'b"') {
                 $finalTokens[$newStackPtr] = array('code' => T_BINARY_CAST, 'type' => 'T_BINARY_CAST', 'content' => 'b');
                 $newStackPtr++;
             }
             $tokenContent = '"';
             $nestedVars = array();
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subToken = (array) $tokens[$i];
                 $subTokenIsArray = isset($subToken[1]);
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $subToken[1];
                     if ($subToken[1] === '{' && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE) {
                         $nestedVars[] = $i;
                     }
                 } else {
                     $tokenContent .= $subToken[0];
                     if ($subToken[0] === '}') {
                         array_pop($nestedVars);
                     }
                 }
                 if ($subTokenIsArray === false && $subToken[0] === '"' && empty($nestedVars) === true) {
                     // We found the other end of the double quoted string.
                     break;
                 }
             }
             //end for
             $stackPtr = $i;
             // Convert each line within the double quoted string to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
                 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             If this is a heredoc, PHP will tokenize the whole
             thing which causes problems when heredocs don't
             contain real PHP code, which is almost never.
             We want to leave the start and end heredoc tokens
             alone though.
         */
         if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
             // Add the start heredoc token to the final array.
             $finalTokens[$newStackPtr] = self::standardiseToken($token);
             // Check if this is actually a nowdoc and use a different token
             // to help the sniffs.
             $nowdoc = false;
             if ($token[1][3] === "'") {
                 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             $tokenContent = '';
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
                     // We found the other end of the heredoc.
                     break;
                 }
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                 } else {
                     $tokenContent .= $tokens[$i];
                 }
             }
             $stackPtr = $i;
             // Convert each line within the heredoc to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 if ($nowdoc === true) {
                     $newToken['code'] = T_NOWDOC;
                     $newToken['type'] = 'T_NOWDOC';
                 } else {
                     $newToken['code'] = T_HEREDOC;
                     $newToken['type'] = 'T_HEREDOC';
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             //end for
             // Add the end heredoc token to the final array.
             $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]);
             if ($nowdoc === true) {
                 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             Before PHP 5.6, the ... operator was tokenized as three
             T_STRING_CONCAT tokens in a row. So look for and combine
             these tokens in earlier versions.
         */
         if ($tokenIsArray === false && $token[0] === '.' && isset($tokens[$stackPtr + 1]) === true && isset($tokens[$stackPtr + 2]) === true && $tokens[$stackPtr + 1] === '.' && $tokens[$stackPtr + 2] === '.') {
             $newToken = array();
             $newToken['code'] = T_ELLIPSIS;
             $newToken['type'] = 'T_ELLIPSIS';
             $newToken['content'] = '...';
             $finalTokens[$newStackPtr] = $newToken;
             $newStackPtr++;
             $stackPtr += 2;
             continue;
         }
         /*
             PHP doesn't assign a token to goto labels, so we have to.
             These are just string tokens with a single colon after them. Double
             colons are already tokenized and so don't interfere with this check.
             But we do have to account for CASE statements, that look just like
             goto labels.
         */
         if ($tokenIsArray === true && $token[0] === T_STRING && $tokens[$stackPtr + 1] === ':' && $tokens[$stackPtr - 1][0] !== T_PAAMAYIM_NEKUDOTAYIM) {
             $stopTokens = array(T_CASE => true, T_SEMICOLON => true, T_OPEN_CURLY_BRACKET => true, T_INLINE_THEN => true);
             for ($x = $newStackPtr - 1; $x > 0; $x--) {
                 if (isset($stopTokens[$finalTokens[$x]['code']]) === true) {
                     break;
                 }
             }
             if ($finalTokens[$x]['code'] !== T_CASE && $finalTokens[$x]['code'] !== T_INLINE_THEN) {
                 $finalTokens[$newStackPtr] = array('content' => $token[1] . ':', 'code' => T_GOTO_LABEL, 'type' => 'T_GOTO_LABEL');
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* token {$stackPtr} changed from T_STRING to T_GOTO_LABEL" . PHP_EOL;
                     echo "\t\t* skipping T_COLON token " . ($stackPtr + 1) . PHP_EOL;
                 }
                 $newStackPtr++;
                 $stackPtr++;
                 continue;
             }
         }
         //end if
         /*
             HHVM 3.5 tokenizes "else[\s]+if" as a T_ELSEIF token while PHP
             proper only tokenizes "elseif" as a T_ELSEIF token. So split
             up the HHVM token to make it looks like proper PHP.
         */
         if ($tokenIsArray === true && $token[0] === T_ELSEIF && strtolower($token[1]) !== 'elseif') {
             $finalTokens[$newStackPtr] = array('content' => substr($token[1], 0, 4), 'code' => T_ELSE, 'type' => 'T_ELSE');
             $newStackPtr++;
             $finalTokens[$newStackPtr] = array('content' => substr($token[1], 4, -2), 'code' => T_WHITESPACE, 'type' => 'T_WHITESPACE');
             $newStackPtr++;
             $finalTokens[$newStackPtr] = array('content' => substr($token[1], -2), 'code' => T_IF, 'type' => 'T_IF');
             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                 echo "\t\t* token {$stackPtr} changed from T_ELSEIF to T_ELSE/T_WHITESPACE/T_IF" . PHP_EOL;
             }
             $newStackPtr++;
             continue;
         }
         //end if
         /*
             If this token has newlines in its content, split each line up
             and create a new token for each line. We do this so it's easier
             to ascertain where errors occur on a line.
             Note that $token[1] is the token's content.
         */
         if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
             $tokenLines = explode($eolChar, $token[1]);
             $numLines = count($tokenLines);
             $newToken = array('type' => token_name($token[0]), 'code' => $token[0], 'content' => '');
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             if ($tokenIsArray === true && $token[0] === T_STRING) {
                 // Some T_STRING tokens should remain that way
                 // due to their context.
                 $context = array(T_OBJECT_OPERATOR => true, T_FUNCTION => true, T_CLASS => true, T_EXTENDS => true, T_IMPLEMENTS => true, T_NEW => true, T_CONST => true, T_NS_SEPARATOR => true, T_USE => true, T_NAMESPACE => true, T_PAAMAYIM_NEKUDOTAYIM => true);
                 if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) {
                     $finalTokens[$newStackPtr] = array('content' => $token[1], 'code' => T_STRING, 'type' => 'T_STRING');
                     $newStackPtr++;
                     continue;
                 }
             }
             //end if
             $newToken = null;
             if ($tokenIsArray === false) {
                 if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
                     $newToken = self::$_resolveTokenCache[$token[0]];
                 }
             } else {
                 $cacheKey = null;
                 if ($token[0] === T_STRING) {
                     $cacheKey = strtolower($token[1]);
                 } else {
                     if ($token[0] !== T_CURLY_OPEN) {
                         $cacheKey = $token[0];
                     }
                 }
                 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
                     $newToken = self::$_resolveTokenCache[$cacheKey];
                     $newToken['content'] = $token[1];
                 }
             }
             if ($newToken === null) {
                 $newToken = self::standardiseToken($token);
             }
             // Convert colons that are actually the ELSE component of an
             // inline IF statement.
             if ($newToken['code'] === T_INLINE_THEN) {
                 $insideInlineIf[] = $stackPtr;
             } else {
                 if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) {
                     array_pop($insideInlineIf);
                     $newToken['code'] = T_INLINE_ELSE;
                     $newToken['type'] = 'T_INLINE_ELSE';
                 }
             }
             // This is a special condition for T_ARRAY tokens used for
             // type hinting function arguments as being arrays. We want to keep
             // the parenthesis map clean, so let's tag these tokens as
             // T_ARRAY_HINT.
             if ($newToken['code'] === T_ARRAY) {
                 // Recalculate number of tokens.
                 for ($i = $stackPtr; $i < $numTokens; $i++) {
                     if ($tokens[$i] === '(') {
                         break;
                     } else {
                         if ($tokens[$i][0] === T_VARIABLE) {
                             $newToken['code'] = T_ARRAY_HINT;
                             $newToken['type'] = 'T_ARRAY_HINT';
                             break;
                         }
                     }
                 }
             }
             // This is a special case for the PHP 5.5 classname::class syntax
             // where "class" should be T_STRING instead of T_CLASS.
             if ($newToken['code'] === T_CLASS && $finalTokens[$newStackPtr - 1]['code'] === T_DOUBLE_COLON) {
                 $newToken['code'] = T_STRING;
                 $newToken['type'] = 'T_STRING';
             }
             // This is a special case for PHP 5.6 use function and use const
             // where "function" and "const" should be T_STRING instead of T_FUNCTION
             // and T_CONST.
             if (($newToken['code'] === T_FUNCTION || $newToken['code'] === T_CONST) && $finalTokens[$lastNotEmptyToken]['code'] === T_USE) {
                 $newToken['code'] = T_STRING;
                 $newToken['type'] = 'T_STRING';
             }
             $finalTokens[$newStackPtr] = $newToken;
             $newStackPtr++;
         }
         //end if
     }
     //end for
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** END PHP TOKENIZING ***" . PHP_EOL;
     }
     return $finalTokens;
 }

Example #3

Show file

File: PHP.php Project: eduardobenito10/jenkins-php-quickstart

 /**
  * Creates an array of tokens when given some PHP code.
  *
  * Starts by using token_get_all() but does a lot of extra processing
  * to insert information about the context of the token.
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** START PHP TOKENIZING ***" . PHP_EOL;
         $isWin = false;
         if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
             $isWin = true;
         }
     }
     $tokens = @token_get_all($string);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $insideInlineIf = false;
     $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = (array) $tokens[$stackPtr];
         $tokenIsArray = isset($token[1]);
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             if ($tokenIsArray === true) {
                 $type = token_name($token[0]);
                 if ($isWin === true) {
                     $content = str_replace($eolChar, '\\n', $token[1]);
                 } else {
                     $content = str_replace($eolChar, "[30;1m\\n[0m", $token[1]);
                 }
             } else {
                 $newToken = self::resolveSimpleToken($token[0]);
                 $type = $newToken['type'];
                 $content = $token[0];
             }
             if ($isWin === false) {
                 $content = str_replace(' ', "[30;1m·[0m", $content);
             }
             echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL;
         }
         /*
             Parse doc blocks into something that can be easily iterated over.
         */
         if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) {
             $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr);
             foreach ($commentTokens as $commentToken) {
                 $finalTokens[$newStackPtr] = $commentToken;
                 $newStackPtr++;
             }
             continue;
         }
         /*
             If this is a double quoted string, PHP will tokenise the whole
             thing which causes problems with the scope map when braces are
             within the string. So we need to merge the tokens together to
             provide a single string.
         */
         if ($tokenIsArray === false && $token[0] === '"') {
             $tokenContent = '"';
             $nestedVars = array();
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subToken = (array) $tokens[$i];
                 $subTokenIsArray = isset($subToken[1]);
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $subToken[1];
                     if ($subToken[1] === '{' && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE) {
                         $nestedVars[] = $i;
                     }
                 } else {
                     $tokenContent .= $subToken[0];
                     if ($subToken[0] === '}') {
                         array_pop($nestedVars);
                     }
                 }
                 if ($subTokenIsArray === false && $subToken[0] === '"' && empty($nestedVars) === true) {
                     // We found the other end of the double quoted string.
                     break;
                 }
             }
             //end for
             $stackPtr = $i;
             // Convert each line within the double quoted string to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
                 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             If this is a heredoc, PHP will tokenise the whole
             thing which causes problems when heredocs don't
             contain real PHP code, which is almost never.
             We want to leave the start and end heredoc tokens
             alone though.
         */
         if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
             // Add the start heredoc token to the final array.
             $finalTokens[$newStackPtr] = self::standardiseToken($token);
             // Check if this is actually a nowdoc and use a different token
             // to help the sniffs.
             $nowdoc = false;
             if ($token[1][3] === "'") {
                 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             $tokenContent = '';
             for ($i = $stackPtr + 1; $i < $numTokens; $i++) {
                 $subTokenIsArray = is_array($tokens[$i]);
                 if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
                     // We found the other end of the heredoc.
                     break;
                 }
                 if ($subTokenIsArray === true) {
                     $tokenContent .= $tokens[$i][1];
                 } else {
                     $tokenContent .= $tokens[$i];
                 }
             }
             $stackPtr = $i;
             // Convert each line within the heredoc to a
             // new token, so it conforms with other multiple line tokens.
             $tokenLines = explode($eolChar, $tokenContent);
             $numLines = count($tokenLines);
             $newToken = array();
             for ($j = 0; $j < $numLines; $j++) {
                 $newToken['content'] = $tokenLines[$j];
                 if ($j === $numLines - 1) {
                     if ($tokenLines[$j] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 if ($nowdoc === true) {
                     $newToken['code'] = T_NOWDOC;
                     $newToken['type'] = 'T_NOWDOC';
                 } else {
                     $newToken['code'] = T_HEREDOC;
                     $newToken['type'] = 'T_HEREDOC';
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
             //end for
             // Add the end heredoc token to the final array.
             $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]);
             if ($nowdoc === true) {
                 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
                 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
                 $nowdoc = true;
             }
             $newStackPtr++;
             // Continue, as we're done with this token.
             continue;
         }
         //end if
         /*
             PHP doesn't assign a token to goto labels, so we have to.
             These are just string tokens with a single colon after them. Double
             colons are already tokenized and so don't interfere with this check.
             But we do have to account for CASE statements, that look just like
             goto labels.
         */
         if ($tokenIsArray === true && $token[0] === T_STRING && $tokens[$stackPtr + 1] === ':' && $tokens[$stackPtr - 1][0] !== T_PAAMAYIM_NEKUDOTAYIM) {
             $stopTokens = array(T_CASE => true, T_SEMICOLON => true, T_OPEN_CURLY_BRACKET => true, T_INLINE_THEN => true);
             for ($x = $newStackPtr - 1; $x > 0; $x--) {
                 if (isset($stopTokens[$finalTokens[$x]['code']]) === true) {
                     break;
                 }
             }
             if ($finalTokens[$x]['code'] !== T_CASE && $finalTokens[$x]['code'] !== T_INLINE_THEN) {
                 $finalTokens[$newStackPtr] = array('content' => $token[1] . ':', 'code' => T_GOTO_LABEL, 'type' => 'T_GOTO_LABEL');
                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
                     echo "\t\t* token {$stackPtr} changed from T_STRING to T_GOTO_LABEL" . PHP_EOL;
                     echo "\t\t* skipping T_COLON token " . ($stackPtr + 1) . PHP_EOL;
                 }
                 $newStackPtr++;
                 $stackPtr++;
                 continue;
             }
         }
         //end if
         /*
             If this token has newlines in its content, split each line up
             and create a new token for each line. We do this so it's easier
             to ascertain where errors occur on a line.
             Note that $token[1] is the token's content.
         */
         if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
             $tokenLines = explode($eolChar, $token[1]);
             $numLines = count($tokenLines);
             $newToken = array('type' => token_name($token[0]), 'code' => $token[0], 'content' => '');
             for ($i = 0; $i < $numLines; $i++) {
                 $newToken['content'] = $tokenLines[$i];
                 if ($i === $numLines - 1) {
                     if ($tokenLines[$i] === '') {
                         break;
                     }
                 } else {
                     $newToken['content'] .= $eolChar;
                 }
                 $finalTokens[$newStackPtr] = $newToken;
                 $newStackPtr++;
             }
         } else {
             $newToken = null;
             if ($tokenIsArray === false) {
                 if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
                     $newToken = self::$_resolveTokenCache[$token[0]];
                 }
             } else {
                 $cacheKey = null;
                 if ($token[0] === T_STRING) {
                     $cacheKey = strtolower($token[1]);
                 } else {
                     if ($token[0] !== T_CURLY_OPEN) {
                         $cacheKey = $token[0];
                     }
                 }
                 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
                     $newToken = self::$_resolveTokenCache[$cacheKey];
                     $newToken['content'] = $token[1];
                 }
             }
             if ($newToken === null) {
                 $newToken = self::standardiseToken($token);
             }
             // Convert colons that are actually the ELSE component of an
             // inline IF statement.
             if ($newToken['code'] === T_INLINE_THEN) {
                 $insideInlineIf = true;
             } else {
                 if ($insideInlineIf === true && $newToken['code'] === T_COLON) {
                     $insideInlineIf = false;
                     $newToken['code'] = T_INLINE_ELSE;
                     $newToken['type'] = 'T_INLINE_ELSE';
                 }
             }
             // This is a special condition for T_ARRAY tokens used for
             // type hinting function arguments as being arrays. We want to keep
             // the parenthesis map clean, so let's tag these tokens as
             // T_ARRAY_HINT.
             if ($newToken['code'] === T_ARRAY) {
                 // Recalculate number of tokens.
                 for ($i = $stackPtr; $i < $numTokens; $i++) {
                     if ($tokens[$i] === '(') {
                         break;
                     } else {
                         if ($tokens[$i][0] === T_VARIABLE) {
                             $newToken['code'] = T_ARRAY_HINT;
                             $newToken['type'] = 'T_ARRAY_HINT';
                             break;
                         }
                     }
                 }
             }
             // This is a special case for the PHP 5.5 classname::class syntax
             // where "class" should be T_STRING instead of T_CLASS.
             if ($newToken['code'] === T_CLASS && $finalTokens[$newStackPtr - 1]['code'] === T_DOUBLE_COLON) {
                 $newToken['code'] = T_STRING;
                 $newToken['type'] = 'T_STRING';
             }
             $finalTokens[$newStackPtr] = $newToken;
             $newStackPtr++;
         }
         //end if
     }
     //end for
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** END PHP TOKENIZING ***" . PHP_EOL;
     }
     return $finalTokens;
 }