tokenizeString() public method

Starts by using token_get_all() but does a lot of extra processing to insert information about the context of the token.
public tokenizeString ( string $string, string $eolChar = '\n' ) : array
$string string The string to tokenize.
$eolChar string The EOL character to use for splitting strings.
return array
示例#1
0
文件: CSS.php 项目: 453111208/bbc
 /**
  * Creates an array of tokens when given some CSS code.
  *
  * Uses the PHP tokenizer to do all the tricky work
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** START CSS TOKENIZING ***" . PHP_EOL;
     }
     // If the content doesn't have an EOL char on the end, add one so
     // the open and close tags we add are parsed correctly.
     $eolAdded = false;
     if (substr($string, strlen($eolChar) * -1) !== $eolChar) {
         $string .= $eolChar;
         $eolAdded = true;
     }
     $string = str_replace('<?php', '^PHPCS_CSS_T_OPEN_TAG^', $string);
     $string = str_replace('?>', '^PHPCS_CSS_T_CLOSE_TAG^', $string);
     $tokens = parent::tokenizeString('<?php ' . $string . '?>', $eolChar);
     $finalTokens = array();
     $finalTokens[0] = array('code' => T_OPEN_TAG, 'type' => 'T_OPEN_TAG', 'content' => '');
     $newStackPtr = 1;
     $numTokens = count($tokens);
     $multiLineComment = false;
     for ($stackPtr = 1; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         // CSS files don't have lists, breaks etc, so convert these to
         // standard strings early so they can be converted into T_STYLE
         // tokens and joined with other strings if needed.
         if ($token['code'] === T_BREAK || $token['code'] === T_LIST || $token['code'] === T_DEFAULT) {
             $token['type'] = 'T_STRING';
             $token['code'] = T_STRING;
         }
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             $type = $token['type'];
             $content = PHP_CodeSniffer::prepareForOutput($token['content']);
             echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL;
         }
         if ($token['code'] === T_POWER && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_OPEN_TAG') {
             $content = '<?php';
             for ($stackPtr = $stackPtr + 3; $stackPtr < $numTokens; $stackPtr++) {
                 if ($tokens[$stackPtr]['code'] === T_POWER && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_CLOSE_TAG') {
                     // Add the end tag and ignore the * we put at the end.
                     $content .= '?>';
                     $stackPtr += 2;
                     break;
                 } else {
                     $content .= $tokens[$stackPtr]['content'];
                 }
             }
             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                 echo "\t\t=> Found embedded PHP code: ";
                 $cleanContent = PHP_CodeSniffer::prepareForOutput($content);
                 echo $cleanContent . PHP_EOL;
             }
             $finalTokens[$newStackPtr] = array('type' => 'T_EMBEDDED_PHP', 'code' => T_EMBEDDED_PHP, 'content' => $content);
             $newStackPtr++;
             continue;
         }
         //end if
         if ($token['code'] === T_GOTO_LABEL) {
             // Convert these back to T_STRING followed by T_COLON so we can
             // more easily process style definitions.
             $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => substr($token['content'], 0, -1));
             $newStackPtr++;
             $finalTokens[$newStackPtr] = array('type' => 'T_COLON', 'code' => T_COLON, 'content' => ':');
             $newStackPtr++;
             continue;
         }
         if ($token['code'] === T_FUNCTION) {
             // There are no functions in CSS, so convert this to a string.
             $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => $token['content']);
             $newStackPtr++;
             continue;
         }
         if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') {
             // Multi-line comment. Record it so we can ignore other
             // comment tags until we get out of this one.
             $multiLineComment = true;
         }
         if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) {
             $content = ltrim($token['content'], '#/');
             // Guard against PHP7+ syntax errors by stripping
             // leading zeros so the content doesn't look like an invalid int.
             $leadingZero = false;
             if ($content[0] === '0') {
                 $content = '1' . $content;
                 $leadingZero = false;
             }
             $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar);
             // The first and last tokens are the open/close tags.
             array_shift($commentTokens);
             array_pop($commentTokens);
             if ($leadingZero === true) {
                 $commentTokens[0]['content'] = substr($commentTokens[0]['content'], 1);
                 $content = substr($content, 1);
             }
             if ($token['content'][0] === '#') {
                 // The # character is not a comment in CSS files, so
                 // determine what it means in this context.
                 $firstContent = $commentTokens[0]['content'];
                 // If the first content is just a number, it is probably a
                 // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
                 if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) {
                     $firstContent .= $commentTokens[1]['content'];
                     array_shift($commentTokens);
                 }
                 // If the first content looks like a colour and not a class
                 // definition, join the tokens together.
                 if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1 && $commentTokens[1]['content'] !== '-') {
                     array_shift($commentTokens);
                     // Work out what we trimmed off above and remember to re-add it.
                     $trimmed = substr($token['content'], 0, strlen($token['content']) - strlen($content));
                     $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => $trimmed . $firstContent);
                 } else {
                     $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#');
                 }
             } else {
                 $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//');
             }
             //end if
             $newStackPtr++;
             foreach ($commentTokens as $tokenData) {
                 if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) {
                     // This is a comment in a comment, so it needs
                     // to go through the whole process again.
                     $tokens[$stackPtr]['content'] = $tokenData['content'];
                     $stackPtr--;
                     break;
                 }
                 $finalTokens[$newStackPtr] = $tokenData;
                 $newStackPtr++;
             }
             continue;
         }
         //end if
         if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') {
             // Multi-line comment is done.
             $multiLineComment = false;
         }
         $finalTokens[$newStackPtr] = $token;
         $newStackPtr++;
     }
     //end for
     // A flag to indicate if we are inside a style definition,
     // which is defined using curly braces.
     $inStyleDef = false;
     // A flag to indicate if an At-rule like "@media" is used, which will result
     // in nested curly brackets.
     $asperandStart = false;
     $numTokens = count($finalTokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $finalTokens[$stackPtr];
         switch ($token['code']) {
             case T_OPEN_CURLY_BRACKET:
                 // Opening curly brackets for an At-rule do not start a style
                 // definition. We also reset the asperand flag here because the next
                 // opening curly bracket could be indeed the start of a style
                 // definition.
                 if ($asperandStart === true) {
                     $inStyleDef = false;
                     $asperandStart = false;
                 } else {
                     $inStyleDef = true;
                 }
                 break;
             case T_CLOSE_CURLY_BRACKET:
                 $inStyleDef = false;
                 $asperandStart = false;
                 break;
             case T_MINUS:
                 // Minus signs are often used instead of spaces inside
                 // class names, IDs and styles.
                 if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                     if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                         $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr + 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         unset($finalTokens[$stackPtr - 1]);
                     } else {
                         $newContent = '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr + 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                     }
                 } else {
                     if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) {
                         // They can also be used to provide negative numbers.
                         $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content'];
                         unset($finalTokens[$stackPtr]);
                     }
                 }
                 //end if
                 break;
             case T_COLON:
                 // Only interested in colons that are defining styles.
                 if ($inStyleDef === false) {
                     break;
                 }
                 for ($x = $stackPtr - 1; $x >= 0; $x--) {
                     if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
                         break;
                     }
                 }
                 $finalTokens[$x]['type'] = 'T_STYLE';
                 $finalTokens[$x]['code'] = T_STYLE;
                 break;
             case T_STRING:
                 if (strtolower($token['content']) === 'url') {
                     // Find the next content.
                     for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                         if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
                             break;
                         }
                     }
                     // Needs to be in the format "url(" for it to be a URL.
                     if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
                         continue;
                     }
                     // Make sure the content isn't empty.
                     for ($y = $x + 1; $y < $numTokens; $y++) {
                         if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$y]['code']]) === false) {
                             break;
                         }
                     }
                     if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) {
                         continue;
                     }
                     // Join all the content together inside the url() statement.
                     $newContent = '';
                     for ($i = $x + 2; $i < $numTokens; $i++) {
                         if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
                             break;
                         }
                         $newContent .= $finalTokens[$i]['content'];
                         unset($finalTokens[$i]);
                     }
                     // If the content inside the "url()" is in double quotes
                     // there will only be one token and so we don't have to do
                     // anything except change its type. If it is not empty,
                     // we need to do some token merging.
                     $finalTokens[$x + 1]['type'] = 'T_URL';
                     $finalTokens[$x + 1]['code'] = T_URL;
                     if ($newContent !== '') {
                         $finalTokens[$x + 1]['content'] .= $newContent;
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 //end if
                 break;
             case T_ASPERAND:
                 $asperandStart = true;
                 break;
             default:
                 // Nothing special to be done with this token.
                 break;
         }
         //end switch
     }
     //end for
     // Reset the array keys to avoid gaps.
     $finalTokens = array_values($finalTokens);
     $numTokens = count($finalTokens);
     // Blank out the content of the end tag.
     $finalTokens[$numTokens - 1]['content'] = '';
     if ($eolAdded === true) {
         // Strip off the extra EOL char we added for tokenizing.
         $finalTokens[$numTokens - 2]['content'] = substr($finalTokens[$numTokens - 2]['content'], 0, strlen($eolChar) * -1);
         if ($finalTokens[$numTokens - 2]['content'] === '') {
             unset($finalTokens[$numTokens - 2]);
             $finalTokens = array_values($finalTokens);
             $numTokens = count($finalTokens);
         }
     }
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** END CSS TOKENIZING ***" . PHP_EOL;
     }
     return $finalTokens;
 }
示例#2
0
 /**
  * Creates a token pattern.
  *
  * @param string $str The tokens string that the pattern should match.
  *
  * @return array The pattern step.
  * @see    _createSkipPattern()
  * @see    _parse()
  */
 private function _createTokenPattern($str)
 {
     // Don't add a space after the closing php tag as it will add a new
     // whitespace token.
     $tokenizer = new PHP_CodeSniffer_Tokenizers_PHP();
     $tokens = $tokenizer->tokenizeString('<?php ' . $str . '?>');
     // Remove the <?php tag from the front and the end php tag from the back.
     $tokens = array_slice($tokens, 1, count($tokens) - 2);
     $patterns = array();
     foreach ($tokens as $patternInfo) {
         $patterns[] = array('type' => 'token', 'token' => $patternInfo['code'], 'value' => $patternInfo['content']);
     }
     return $patterns;
 }
示例#3
0
文件: CSS.php 项目: josephj/modev
 /**
  * Creates an array of tokens when given some CSS code.
  *
  * Uses the PHP tokenizer to do all the tricky work
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     $tokens = parent::tokenizeString('<?php ' . $string . ' ?>', $eolChar);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $multiLineComment = false;
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         // Styles like list-style are tokenized as T_LIST-T_STRING
         // so convert the T_LIST to a string.
         if ($token['code'] === T_LIST) {
             $token['code'] = T_STRING;
             $token['type'] = 'T_STRING';
         }
         if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') {
             // Multi-line comment. Record it so we can ignore other
             // comment tags until we get out of this one.
             $multiLineComment = true;
         }
         if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) {
             $content = ltrim($token['content'], '#/');
             $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar);
             // The first and last tokens are the open/close tags.
             array_shift($commentTokens);
             array_pop($commentTokens);
             if ($token['content'][0] === '#') {
                 // The # character is not a comment in CSS files, so
                 // determine what it means in this context.
                 $firstContent = $commentTokens[0]['content'];
                 // If the first content is just a number, it is probably a
                 // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
                 if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) {
                     $firstContent .= $commentTokens[1]['content'];
                     array_shift($commentTokens);
                 }
                 // If the first content looks like a colour and not a class
                 // definition, join the tokens together.
                 if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) {
                     array_shift($commentTokens);
                     $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => '#' . $firstContent);
                 } else {
                     $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#');
                 }
             } else {
                 $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//');
             }
             //end if
             $newStackPtr++;
             foreach ($commentTokens as $tokenData) {
                 if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) {
                     // This is a comment in a comment, so it needs
                     // to go through the whole process again.
                     $tokens[$stackPtr]['content'] = $tokenData['content'];
                     $stackPtr--;
                     break;
                 }
                 $finalTokens[$newStackPtr] = $tokenData;
                 $newStackPtr++;
             }
             continue;
         }
         //end if
         if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') {
             // Multi-line comment is done.
             $multiLineComment = false;
         }
         $finalTokens[$newStackPtr] = $token;
         $newStackPtr++;
     }
     //end for
     // A flag to indicate if we are inside a style definition,
     // which is defined using curly braces. I'm assuming you can't
     // have nested curly brackets.
     $inStyleDef = false;
     $numTokens = count($finalTokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $finalTokens[$stackPtr];
         switch ($token['code']) {
             case T_OPEN_CURLY_BRACKET:
                 $inStyleDef = true;
                 break;
             case T_CLOSE_CURLY_BRACKET:
                 $inStyleDef = false;
                 break;
             case T_MINUS:
                 // Minus signs are often used instead of spaces inside
                 // class names, IDs and styles.
                 if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                     if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                         $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr - 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         unset($finalTokens[$stackPtr + 1]);
                         $stackPtr -= 2;
                     } else {
                         $newContent = '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr + 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         $stackPtr--;
                     }
                     $finalTokens = array_values($finalTokens);
                     $numTokens = count($finalTokens);
                 } else {
                     if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) {
                         // They can also be used to provide negative numbers.
                         $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content'];
                         unset($finalTokens[$stackPtr]);
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 break;
             case T_COLON:
                 // Only interested in colons that are defining styles.
                 if ($inStyleDef === false) {
                     break;
                 }
                 for ($x = $stackPtr - 1; $x >= 0; $x--) {
                     if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                         break;
                     }
                 }
                 $finalTokens[$x]['type'] = 'T_STYLE';
                 $finalTokens[$x]['code'] = T_STYLE;
                 break;
             case T_STRING:
                 if (strtolower($token['content']) === 'url') {
                     // Find the next content.
                     for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                         if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                             break;
                         }
                     }
                     // Needs to be in the format url( for it to be a URL.
                     if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
                         continue;
                     }
                     // Make sure the content isn't empty.
                     for ($y = $x + 1; $y < $numTokens; $y++) {
                         if (in_array($finalTokens[$y]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                             break;
                         }
                     }
                     if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) {
                         continue;
                     }
                     // Join all the content together inside the url() statement.
                     $newContent = '';
                     for ($i = $x + 2; $i < $numTokens; $i++) {
                         if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
                             break;
                         }
                         $newContent .= $finalTokens[$i]['content'];
                         unset($finalTokens[$i]);
                     }
                     if ($newContent !== '') {
                         $finalTokens[$x + 1]['type'] = 'T_URL';
                         $finalTokens[$x + 1]['code'] = T_URL;
                         $finalTokens[$x + 1]['content'] .= $newContent;
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 //end if
                 break;
             default:
                 // Nothing special to be done with this token.
                 break;
         }
         //end switch
     }
     //end for
     return $finalTokens;
 }
示例#4
0
 /**
  * Creates an array of tokens when given some CSS code.
  *
  * Uses the PHP tokenizer to do all the tricky work
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** START CSS TOKENIZING ***" . PHP_EOL;
     }
     // If the content doesn't have an EOl char on the end, add one so
     // the open and close tags we add are parsed correctly.
     if (substr($string, 0, strlen($eolChar) * -1) !== $eolChar) {
         $string .= $eolChar;
     }
     $tokens = parent::tokenizeString('<?php ' . $string . '?>', $eolChar);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     $multiLineComment = false;
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         if (PHP_CODESNIFFER_VERBOSITY > 1) {
             $type = $token['type'];
             $content = str_replace($eolChar, '\\n', $token['content']);
             echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL;
         }
         // Sometimes, there are PHP tags embedded in the code, which causes issues
         // with how PHP tokenizeses the string. After the first closing tag is found,
         // everything outside PHP tags is set as inline HTML tokens (1 for each line).
         // So we need to go through and find these tokens so we can re-tokenize them.
         if ($token['code'] === T_CLOSE_TAG && $stackPtr !== $numTokens - 1) {
             $content = '<?php ';
             for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                 if ($tokens[$x]['code'] === T_INLINE_HTML) {
                     $content .= $tokens[$x]['content'];
                 } else {
                     $x--;
                     break;
                 }
             }
             if ($x < $numTokens - 1) {
                 // This is not the last closing tag in the file, so we
                 // have to add another closing tag here. If it is the last closing
                 // tag, this additional one would have been added during the
                 // original tokenize call.
                 $content .= ' ?>';
             }
             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                 echo "\t\t=> Found premature closing tag at {$stackPtr}" . PHP_EOL;
                 $cleanContent = str_replace($eolChar, '\\n', $content);
                 echo "\t\tcontent: {$cleanContent}" . PHP_EOL;
                 $oldNumTokens = $numTokens;
             }
             // Tokenize the string and remove the extra PHP tags we don't need.
             $moreTokens = parent::tokenizeString($content, $eolChar);
             array_shift($moreTokens);
             array_pop($moreTokens);
             array_pop($moreTokens);
             // Rebuild the tokens array.
             array_splice($tokens, $stackPtr + 1, $x - $stackPtr, $moreTokens);
             $numTokens = count($tokens);
             if (PHP_CODESNIFFER_VERBOSITY > 1) {
                 $count = count($moreTokens);
                 $diff = $x - $stackPtr;
                 echo "\t\t* added {$count} tokens, replaced {$diff}; size changed from {$oldNumTokens} to {$numTokens} *" . PHP_EOL;
             }
         }
         //end if
         if ($token['code'] === T_FUNCTION) {
             // There are no functions in CSS, so convert this to a string.
             $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => $token['content']);
             $newStackPtr++;
             continue;
         }
         if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') {
             // Multi-line comment. Record it so we can ignore other
             // comment tags until we get out of this one.
             $multiLineComment = true;
         }
         if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) {
             $content = ltrim($token['content'], '#/');
             $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar);
             // The first and last tokens are the open/close tags.
             array_shift($commentTokens);
             array_pop($commentTokens);
             if ($token['content'][0] === '#') {
                 // The # character is not a comment in CSS files, so
                 // determine what it means in this context.
                 $firstContent = $commentTokens[0]['content'];
                 // If the first content is just a number, it is probably a
                 // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
                 if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) {
                     $firstContent .= $commentTokens[1]['content'];
                     array_shift($commentTokens);
                 }
                 // If the first content looks like a colour and not a class
                 // definition, join the tokens together.
                 if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) {
                     array_shift($commentTokens);
                     // Work out what we trimmed off above and remember to re-add it.
                     $trimmed = substr($token['content'], 0, strlen($token['content']) - strlen($content));
                     $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => $trimmed . $firstContent);
                 } else {
                     $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#');
                 }
             } else {
                 $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//');
             }
             //end if
             $newStackPtr++;
             foreach ($commentTokens as $tokenData) {
                 if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) {
                     // This is a comment in a comment, so it needs
                     // to go through the whole process again.
                     $tokens[$stackPtr]['content'] = $tokenData['content'];
                     $stackPtr--;
                     break;
                 }
                 $finalTokens[$newStackPtr] = $tokenData;
                 $newStackPtr++;
             }
             continue;
         }
         //end if
         if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') {
             // Multi-line comment is done.
             $multiLineComment = false;
         }
         $finalTokens[$newStackPtr] = $token;
         $newStackPtr++;
     }
     //end for
     // A flag to indicate if we are inside a style definition,
     // which is defined using curly braces. I'm assuming you can't
     // have nested curly brackets.
     $inStyleDef = false;
     $numTokens = count($finalTokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $finalTokens[$stackPtr];
         switch ($token['code']) {
             case T_OPEN_CURLY_BRACKET:
                 $inStyleDef = true;
                 break;
             case T_CLOSE_CURLY_BRACKET:
                 $inStyleDef = false;
                 break;
             case T_MINUS:
                 // Minus signs are often used instead of spaces inside
                 // class names, IDs and styles.
                 if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                     if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                         $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr - 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         unset($finalTokens[$stackPtr + 1]);
                         $stackPtr -= 2;
                     } else {
                         $newContent = '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr + 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         $stackPtr--;
                     }
                     $finalTokens = array_values($finalTokens);
                     $numTokens = count($finalTokens);
                 } else {
                     if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) {
                         // They can also be used to provide negative numbers.
                         $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content'];
                         unset($finalTokens[$stackPtr]);
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 break;
             case T_COLON:
                 // Only interested in colons that are defining styles.
                 if ($inStyleDef === false) {
                     break;
                 }
                 for ($x = $stackPtr - 1; $x >= 0; $x--) {
                     if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                         break;
                     }
                 }
                 $finalTokens[$x]['type'] = 'T_STYLE';
                 $finalTokens[$x]['code'] = T_STYLE;
                 break;
             case T_STRING:
                 if (strtolower($token['content']) === 'url') {
                     // Find the next content.
                     for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                         if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                             break;
                         }
                     }
                     // Needs to be in the format "url(" for it to be a URL.
                     if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
                         continue;
                     }
                     // Make sure the content isn't empty.
                     for ($y = $x + 1; $y < $numTokens; $y++) {
                         if (in_array($finalTokens[$y]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                             break;
                         }
                     }
                     if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) {
                         continue;
                     }
                     // Join all the content together inside the url() statement.
                     $newContent = '';
                     for ($i = $x + 2; $i < $numTokens; $i++) {
                         if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
                             break;
                         }
                         $newContent .= $finalTokens[$i]['content'];
                         unset($finalTokens[$i]);
                     }
                     // If the content inside the "url()" is in double quotes
                     // there will only be one token and so we don't have to do
                     // anything except change its type. If it is not empty,
                     // we need to do some token merging.
                     $finalTokens[$x + 1]['type'] = 'T_URL';
                     $finalTokens[$x + 1]['code'] = T_URL;
                     if ($newContent !== '') {
                         $finalTokens[$x + 1]['content'] .= $newContent;
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 //end if
                 break;
             default:
                 // Nothing special to be done with this token.
                 break;
         }
         //end switch
     }
     //end for
     if (PHP_CODESNIFFER_VERBOSITY > 1) {
         echo "\t*** END CSS TOKENIZING ***" . PHP_EOL;
     }
     return $finalTokens;
 }
示例#5
0
 /**
  * Creates an array of tokens when given some CSS code.
  *
  * Uses the PHP tokenizer to do all the tricky work
  *
  * @param string $string  The string to tokenize.
  * @param string $eolChar The EOL character to use for splitting strings.
  *
  * @return array
  */
 public function tokenizeString($string, $eolChar = '\\n')
 {
     $tokens = parent::tokenizeString('<?php ' . $string . ' ?>', $eolChar);
     $finalTokens = array();
     $newStackPtr = 0;
     $numTokens = count($tokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $tokens[$stackPtr];
         // Styles like list-style are tokenized as T_LIST-T_STRING
         // so convert the T_LIST to a string.
         if ($token['code'] === T_LIST) {
             $token['code'] = T_STRING;
             $token['type'] = 'T_STRING';
         }
         if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) !== '/*') {
             $content = ltrim($token['content'], '#/');
             $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar);
             // The first and last tokens are the open/close tags.
             array_shift($commentTokens);
             array_pop($commentTokens);
             if ($token['content'][0] === '#') {
                 // The # character is not a comment in CSS files, so determine
                 // what it means in this context.
                 $firstContent = $commentTokens[0]['content'];
                 // If the first content is just a number, it is probably a
                 // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
                 if ($commentTokens[0]['code'] === T_LNUMBER && $commentTokens[1]['code'] === T_STRING) {
                     $firstContent .= $commentTokens[1]['content'];
                     array_shift($commentTokens);
                 }
                 // If the first content looks like a colour and not a class
                 // definition, join the tokens together.
                 if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) {
                     array_shift($commentTokens);
                     $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => '#' . $firstContent);
                 } else {
                     $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#');
                 }
             } else {
                 $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//');
             }
             //end if
             $newStackPtr++;
             foreach ($commentTokens as $tokenData) {
                 $finalTokens[$newStackPtr] = $tokenData;
                 $newStackPtr++;
             }
             continue;
         }
         //end if
         $finalTokens[$newStackPtr] = $token;
         $newStackPtr++;
     }
     //end for
     $numTokens = count($finalTokens);
     for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
         $token = $finalTokens[$stackPtr];
         switch ($token['code']) {
             case T_MINUS:
                 // Minus signs are often used instead of spaces inside
                 // class names, IDs and styles.
                 if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                     if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                         $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr - 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         unset($finalTokens[$stackPtr + 1]);
                         $stackPtr -= 2;
                     } else {
                         $newContent = '-' . $finalTokens[$stackPtr + 1]['content'];
                         $finalTokens[$stackPtr + 1]['content'] = $newContent;
                         unset($finalTokens[$stackPtr]);
                         $stackPtr--;
                     }
                     $finalTokens = array_values($finalTokens);
                     $numTokens = count($finalTokens);
                 } else {
                     if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) {
                         // They can also be used to provide negative numbers.
                         $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content'];
                         unset($finalTokens[$stackPtr]);
                         $finalTokens = array_values($finalTokens);
                         $numTokens = count($finalTokens);
                     }
                 }
                 break;
             case T_COLON:
                 // Find the previous content.
                 for ($x = $stackPtr - 1; $x >= 0; $x--) {
                     if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                         break;
                     }
                 }
                 $finalTokens[$x]['type'] = 'T_STYLE';
                 $finalTokens[$x]['code'] = T_STYLE;
                 break;
             case T_STRING:
                 if (strtolower($token['content']) === 'url') {
                     // Find the next content.
                     for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                         if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
                             break;
                         }
                     }
                     // Needs to be in the format url( for it to be a URL.
                     if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
                         continue;
                     }
                     // Join all the content together inside the url() statement.
                     $newContent = '';
                     for ($i = $x + 2; $i < $numTokens; $i++) {
                         if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
                             break;
                         }
                         $newContent .= $finalTokens[$i]['content'];
                         unset($finalTokens[$i]);
                     }
                     $finalTokens[$x + 1]['type'] = 'T_URL';
                     $finalTokens[$x + 1]['code'] = T_URL;
                     $finalTokens[$x + 1]['content'] .= $newContent;
                     $finalTokens = array_values($finalTokens);
                     $numTokens = count($finalTokens);
                 }
                 //end if
                 break;
             default:
                 // Nothing special to be done with this token.
                 break;
         }
         //end switch
     }
     //end for
     return $finalTokens;
 }