/** * Creates an array of tokens when given some CSS code. * * Uses the PHP tokenizer to do all the tricky work * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * * @return array */ public function tokenizeString($string, $eolChar = '\\n') { if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t*** START CSS TOKENIZING ***" . PHP_EOL; } // If the content doesn't have an EOL char on the end, add one so // the open and close tags we add are parsed correctly. $eolAdded = false; if (substr($string, strlen($eolChar) * -1) !== $eolChar) { $string .= $eolChar; $eolAdded = true; } $string = str_replace('<?php', '^PHPCS_CSS_T_OPEN_TAG^', $string); $string = str_replace('?>', '^PHPCS_CSS_T_CLOSE_TAG^', $string); $tokens = parent::tokenizeString('<?php ' . $string . '?>', $eolChar); $finalTokens = array(); $finalTokens[0] = array('code' => T_OPEN_TAG, 'type' => 'T_OPEN_TAG', 'content' => ''); $newStackPtr = 1; $numTokens = count($tokens); $multiLineComment = false; for ($stackPtr = 1; $stackPtr < $numTokens; $stackPtr++) { $token = $tokens[$stackPtr]; // CSS files don't have lists, breaks etc, so convert these to // standard strings early so they can be converted into T_STYLE // tokens and joined with other strings if needed. if ($token['code'] === T_BREAK || $token['code'] === T_LIST || $token['code'] === T_DEFAULT) { $token['type'] = 'T_STRING'; $token['code'] = T_STRING; } if (PHP_CODESNIFFER_VERBOSITY > 1) { $type = $token['type']; $content = PHP_CodeSniffer::prepareForOutput($token['content']); echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL; } if ($token['code'] === T_POWER && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_OPEN_TAG') { $content = '<?php'; for ($stackPtr = $stackPtr + 3; $stackPtr < $numTokens; $stackPtr++) { if ($tokens[$stackPtr]['code'] === T_POWER && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_CLOSE_TAG') { // Add the end tag and ignore the * we put at the end. $content .= '?>'; $stackPtr += 2; break; } else { $content .= $tokens[$stackPtr]['content']; } } if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t=> Found embedded PHP code: "; $cleanContent = PHP_CodeSniffer::prepareForOutput($content); echo $cleanContent . PHP_EOL; } $finalTokens[$newStackPtr] = array('type' => 'T_EMBEDDED_PHP', 'code' => T_EMBEDDED_PHP, 'content' => $content); $newStackPtr++; continue; } //end if if ($token['code'] === T_GOTO_LABEL) { // Convert these back to T_STRING followed by T_COLON so we can // more easily process style definitions. $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => substr($token['content'], 0, -1)); $newStackPtr++; $finalTokens[$newStackPtr] = array('type' => 'T_COLON', 'code' => T_COLON, 'content' => ':'); $newStackPtr++; continue; } if ($token['code'] === T_FUNCTION) { // There are no functions in CSS, so convert this to a string. $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => $token['content']); $newStackPtr++; continue; } if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') { // Multi-line comment. Record it so we can ignore other // comment tags until we get out of this one. $multiLineComment = true; } if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) { $content = ltrim($token['content'], '#/'); // Guard against PHP7+ syntax errors by stripping // leading zeros so the content doesn't look like an invalid int. $leadingZero = false; if ($content[0] === '0') { $content = '1' . $content; $leadingZero = false; } $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar); // The first and last tokens are the open/close tags. array_shift($commentTokens); array_pop($commentTokens); if ($leadingZero === true) { $commentTokens[0]['content'] = substr($commentTokens[0]['content'], 1); $content = substr($content, 1); } if ($token['content'][0] === '#') { // The # character is not a comment in CSS files, so // determine what it means in this context. $firstContent = $commentTokens[0]['content']; // If the first content is just a number, it is probably a // colour like 8FB7DB, which PHP splits into 8 and FB7DB. if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) { $firstContent .= $commentTokens[1]['content']; array_shift($commentTokens); } // If the first content looks like a colour and not a class // definition, join the tokens together. if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1 && $commentTokens[1]['content'] !== '-') { array_shift($commentTokens); // Work out what we trimmed off above and remember to re-add it. $trimmed = substr($token['content'], 0, strlen($token['content']) - strlen($content)); $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => $trimmed . $firstContent); } else { $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#'); } } else { $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//'); } //end if $newStackPtr++; foreach ($commentTokens as $tokenData) { if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) { // This is a comment in a comment, so it needs // to go through the whole process again. $tokens[$stackPtr]['content'] = $tokenData['content']; $stackPtr--; break; } $finalTokens[$newStackPtr] = $tokenData; $newStackPtr++; } continue; } //end if if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') { // Multi-line comment is done. $multiLineComment = false; } $finalTokens[$newStackPtr] = $token; $newStackPtr++; } //end for // A flag to indicate if we are inside a style definition, // which is defined using curly braces. $inStyleDef = false; // A flag to indicate if an At-rule like "@media" is used, which will result // in nested curly brackets. $asperandStart = false; $numTokens = count($finalTokens); for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $finalTokens[$stackPtr]; switch ($token['code']) { case T_OPEN_CURLY_BRACKET: // Opening curly brackets for an At-rule do not start a style // definition. We also reset the asperand flag here because the next // opening curly bracket could be indeed the start of a style // definition. if ($asperandStart === true) { $inStyleDef = false; $asperandStart = false; } else { $inStyleDef = true; } break; case T_CLOSE_CURLY_BRACKET: $inStyleDef = false; $asperandStart = false; break; case T_MINUS: // Minus signs are often used instead of spaces inside // class names, IDs and styles. if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) { if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) { $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr + 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); unset($finalTokens[$stackPtr - 1]); } else { $newContent = '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr + 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); } } else { if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) { // They can also be used to provide negative numbers. $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content']; unset($finalTokens[$stackPtr]); } } //end if break; case T_COLON: // Only interested in colons that are defining styles. if ($inStyleDef === false) { break; } for ($x = $stackPtr - 1; $x >= 0; $x--) { if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) { break; } } $finalTokens[$x]['type'] = 'T_STYLE'; $finalTokens[$x]['code'] = T_STYLE; break; case T_STRING: if (strtolower($token['content']) === 'url') { // Find the next content. for ($x = $stackPtr + 1; $x < $numTokens; $x++) { if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) { break; } } // Needs to be in the format "url(" for it to be a URL. if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) { continue; } // Make sure the content isn't empty. for ($y = $x + 1; $y < $numTokens; $y++) { if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$finalTokens[$y]['code']]) === false) { break; } } if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) { continue; } // Join all the content together inside the url() statement. $newContent = ''; for ($i = $x + 2; $i < $numTokens; $i++) { if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) { break; } $newContent .= $finalTokens[$i]['content']; unset($finalTokens[$i]); } // If the content inside the "url()" is in double quotes // there will only be one token and so we don't have to do // anything except change its type. If it is not empty, // we need to do some token merging. $finalTokens[$x + 1]['type'] = 'T_URL'; $finalTokens[$x + 1]['code'] = T_URL; if ($newContent !== '') { $finalTokens[$x + 1]['content'] .= $newContent; $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } //end if break; case T_ASPERAND: $asperandStart = true; break; default: // Nothing special to be done with this token. break; } //end switch } //end for // Reset the array keys to avoid gaps. $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); // Blank out the content of the end tag. $finalTokens[$numTokens - 1]['content'] = ''; if ($eolAdded === true) { // Strip off the extra EOL char we added for tokenizing. $finalTokens[$numTokens - 2]['content'] = substr($finalTokens[$numTokens - 2]['content'], 0, strlen($eolChar) * -1); if ($finalTokens[$numTokens - 2]['content'] === '') { unset($finalTokens[$numTokens - 2]); $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t*** END CSS TOKENIZING ***" . PHP_EOL; } return $finalTokens; }
/** * Creates a token pattern. * * @param string $str The tokens string that the pattern should match. * * @return array The pattern step. * @see _createSkipPattern() * @see _parse() */ private function _createTokenPattern($str) { // Don't add a space after the closing php tag as it will add a new // whitespace token. $tokenizer = new PHP_CodeSniffer_Tokenizers_PHP(); $tokens = $tokenizer->tokenizeString('<?php ' . $str . '?>'); // Remove the <?php tag from the front and the end php tag from the back. $tokens = array_slice($tokens, 1, count($tokens) - 2); $patterns = array(); foreach ($tokens as $patternInfo) { $patterns[] = array('type' => 'token', 'token' => $patternInfo['code'], 'value' => $patternInfo['content']); } return $patterns; }
/** * Creates an array of tokens when given some CSS code. * * Uses the PHP tokenizer to do all the tricky work * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * * @return array */ public function tokenizeString($string, $eolChar = '\\n') { $tokens = parent::tokenizeString('<?php ' . $string . ' ?>', $eolChar); $finalTokens = array(); $newStackPtr = 0; $numTokens = count($tokens); $multiLineComment = false; for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $tokens[$stackPtr]; // Styles like list-style are tokenized as T_LIST-T_STRING // so convert the T_LIST to a string. if ($token['code'] === T_LIST) { $token['code'] = T_STRING; $token['type'] = 'T_STRING'; } if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') { // Multi-line comment. Record it so we can ignore other // comment tags until we get out of this one. $multiLineComment = true; } if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) { $content = ltrim($token['content'], '#/'); $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar); // The first and last tokens are the open/close tags. array_shift($commentTokens); array_pop($commentTokens); if ($token['content'][0] === '#') { // The # character is not a comment in CSS files, so // determine what it means in this context. $firstContent = $commentTokens[0]['content']; // If the first content is just a number, it is probably a // colour like 8FB7DB, which PHP splits into 8 and FB7DB. if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) { $firstContent .= $commentTokens[1]['content']; array_shift($commentTokens); } // If the first content looks like a colour and not a class // definition, join the tokens together. if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) { array_shift($commentTokens); $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => '#' . $firstContent); } else { $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#'); } } else { $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//'); } //end if $newStackPtr++; foreach ($commentTokens as $tokenData) { if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) { // This is a comment in a comment, so it needs // to go through the whole process again. $tokens[$stackPtr]['content'] = $tokenData['content']; $stackPtr--; break; } $finalTokens[$newStackPtr] = $tokenData; $newStackPtr++; } continue; } //end if if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') { // Multi-line comment is done. $multiLineComment = false; } $finalTokens[$newStackPtr] = $token; $newStackPtr++; } //end for // A flag to indicate if we are inside a style definition, // which is defined using curly braces. I'm assuming you can't // have nested curly brackets. $inStyleDef = false; $numTokens = count($finalTokens); for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $finalTokens[$stackPtr]; switch ($token['code']) { case T_OPEN_CURLY_BRACKET: $inStyleDef = true; break; case T_CLOSE_CURLY_BRACKET: $inStyleDef = false; break; case T_MINUS: // Minus signs are often used instead of spaces inside // class names, IDs and styles. if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) { if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) { $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr - 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); unset($finalTokens[$stackPtr + 1]); $stackPtr -= 2; } else { $newContent = '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr + 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); $stackPtr--; } $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } else { if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) { // They can also be used to provide negative numbers. $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content']; unset($finalTokens[$stackPtr]); $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } break; case T_COLON: // Only interested in colons that are defining styles. if ($inStyleDef === false) { break; } for ($x = $stackPtr - 1; $x >= 0; $x--) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } $finalTokens[$x]['type'] = 'T_STYLE'; $finalTokens[$x]['code'] = T_STYLE; break; case T_STRING: if (strtolower($token['content']) === 'url') { // Find the next content. for ($x = $stackPtr + 1; $x < $numTokens; $x++) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } // Needs to be in the format url( for it to be a URL. if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) { continue; } // Make sure the content isn't empty. for ($y = $x + 1; $y < $numTokens; $y++) { if (in_array($finalTokens[$y]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) { continue; } // Join all the content together inside the url() statement. $newContent = ''; for ($i = $x + 2; $i < $numTokens; $i++) { if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) { break; } $newContent .= $finalTokens[$i]['content']; unset($finalTokens[$i]); } if ($newContent !== '') { $finalTokens[$x + 1]['type'] = 'T_URL'; $finalTokens[$x + 1]['code'] = T_URL; $finalTokens[$x + 1]['content'] .= $newContent; $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } //end if break; default: // Nothing special to be done with this token. break; } //end switch } //end for return $finalTokens; }
/** * Creates an array of tokens when given some CSS code. * * Uses the PHP tokenizer to do all the tricky work * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * * @return array */ public function tokenizeString($string, $eolChar = '\\n') { if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t*** START CSS TOKENIZING ***" . PHP_EOL; } // If the content doesn't have an EOl char on the end, add one so // the open and close tags we add are parsed correctly. if (substr($string, 0, strlen($eolChar) * -1) !== $eolChar) { $string .= $eolChar; } $tokens = parent::tokenizeString('<?php ' . $string . '?>', $eolChar); $finalTokens = array(); $newStackPtr = 0; $numTokens = count($tokens); $multiLineComment = false; for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $tokens[$stackPtr]; if (PHP_CODESNIFFER_VERBOSITY > 1) { $type = $token['type']; $content = str_replace($eolChar, '\\n', $token['content']); echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL; } // Sometimes, there are PHP tags embedded in the code, which causes issues // with how PHP tokenizeses the string. After the first closing tag is found, // everything outside PHP tags is set as inline HTML tokens (1 for each line). // So we need to go through and find these tokens so we can re-tokenize them. if ($token['code'] === T_CLOSE_TAG && $stackPtr !== $numTokens - 1) { $content = '<?php '; for ($x = $stackPtr + 1; $x < $numTokens; $x++) { if ($tokens[$x]['code'] === T_INLINE_HTML) { $content .= $tokens[$x]['content']; } else { $x--; break; } } if ($x < $numTokens - 1) { // This is not the last closing tag in the file, so we // have to add another closing tag here. If it is the last closing // tag, this additional one would have been added during the // original tokenize call. $content .= ' ?>'; } if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t=> Found premature closing tag at {$stackPtr}" . PHP_EOL; $cleanContent = str_replace($eolChar, '\\n', $content); echo "\t\tcontent: {$cleanContent}" . PHP_EOL; $oldNumTokens = $numTokens; } // Tokenize the string and remove the extra PHP tags we don't need. $moreTokens = parent::tokenizeString($content, $eolChar); array_shift($moreTokens); array_pop($moreTokens); array_pop($moreTokens); // Rebuild the tokens array. array_splice($tokens, $stackPtr + 1, $x - $stackPtr, $moreTokens); $numTokens = count($tokens); if (PHP_CODESNIFFER_VERBOSITY > 1) { $count = count($moreTokens); $diff = $x - $stackPtr; echo "\t\t* added {$count} tokens, replaced {$diff}; size changed from {$oldNumTokens} to {$numTokens} *" . PHP_EOL; } } //end if if ($token['code'] === T_FUNCTION) { // There are no functions in CSS, so convert this to a string. $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => $token['content']); $newStackPtr++; continue; } if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') { // Multi-line comment. Record it so we can ignore other // comment tags until we get out of this one. $multiLineComment = true; } if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) { $content = ltrim($token['content'], '#/'); $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar); // The first and last tokens are the open/close tags. array_shift($commentTokens); array_pop($commentTokens); if ($token['content'][0] === '#') { // The # character is not a comment in CSS files, so // determine what it means in this context. $firstContent = $commentTokens[0]['content']; // If the first content is just a number, it is probably a // colour like 8FB7DB, which PHP splits into 8 and FB7DB. if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) { $firstContent .= $commentTokens[1]['content']; array_shift($commentTokens); } // If the first content looks like a colour and not a class // definition, join the tokens together. if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) { array_shift($commentTokens); // Work out what we trimmed off above and remember to re-add it. $trimmed = substr($token['content'], 0, strlen($token['content']) - strlen($content)); $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => $trimmed . $firstContent); } else { $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#'); } } else { $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//'); } //end if $newStackPtr++; foreach ($commentTokens as $tokenData) { if ($tokenData['code'] === T_COMMENT && (substr($tokenData['content'], 0, 2) === '//' || $tokenData['content'][0] === '#')) { // This is a comment in a comment, so it needs // to go through the whole process again. $tokens[$stackPtr]['content'] = $tokenData['content']; $stackPtr--; break; } $finalTokens[$newStackPtr] = $tokenData; $newStackPtr++; } continue; } //end if if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') { // Multi-line comment is done. $multiLineComment = false; } $finalTokens[$newStackPtr] = $token; $newStackPtr++; } //end for // A flag to indicate if we are inside a style definition, // which is defined using curly braces. I'm assuming you can't // have nested curly brackets. $inStyleDef = false; $numTokens = count($finalTokens); for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $finalTokens[$stackPtr]; switch ($token['code']) { case T_OPEN_CURLY_BRACKET: $inStyleDef = true; break; case T_CLOSE_CURLY_BRACKET: $inStyleDef = false; break; case T_MINUS: // Minus signs are often used instead of spaces inside // class names, IDs and styles. if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) { if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) { $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr - 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); unset($finalTokens[$stackPtr + 1]); $stackPtr -= 2; } else { $newContent = '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr + 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); $stackPtr--; } $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } else { if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) { // They can also be used to provide negative numbers. $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content']; unset($finalTokens[$stackPtr]); $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } break; case T_COLON: // Only interested in colons that are defining styles. if ($inStyleDef === false) { break; } for ($x = $stackPtr - 1; $x >= 0; $x--) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } $finalTokens[$x]['type'] = 'T_STYLE'; $finalTokens[$x]['code'] = T_STYLE; break; case T_STRING: if (strtolower($token['content']) === 'url') { // Find the next content. for ($x = $stackPtr + 1; $x < $numTokens; $x++) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } // Needs to be in the format "url(" for it to be a URL. if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) { continue; } // Make sure the content isn't empty. for ($y = $x + 1; $y < $numTokens; $y++) { if (in_array($finalTokens[$y]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) { continue; } // Join all the content together inside the url() statement. $newContent = ''; for ($i = $x + 2; $i < $numTokens; $i++) { if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) { break; } $newContent .= $finalTokens[$i]['content']; unset($finalTokens[$i]); } // If the content inside the "url()" is in double quotes // there will only be one token and so we don't have to do // anything except change its type. If it is not empty, // we need to do some token merging. $finalTokens[$x + 1]['type'] = 'T_URL'; $finalTokens[$x + 1]['code'] = T_URL; if ($newContent !== '') { $finalTokens[$x + 1]['content'] .= $newContent; $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } //end if break; default: // Nothing special to be done with this token. break; } //end switch } //end for if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t*** END CSS TOKENIZING ***" . PHP_EOL; } return $finalTokens; }
/** * Creates an array of tokens when given some CSS code. * * Uses the PHP tokenizer to do all the tricky work * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * * @return array */ public function tokenizeString($string, $eolChar = '\\n') { $tokens = parent::tokenizeString('<?php ' . $string . ' ?>', $eolChar); $finalTokens = array(); $newStackPtr = 0; $numTokens = count($tokens); for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $tokens[$stackPtr]; // Styles like list-style are tokenized as T_LIST-T_STRING // so convert the T_LIST to a string. if ($token['code'] === T_LIST) { $token['code'] = T_STRING; $token['type'] = 'T_STRING'; } if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) !== '/*') { $content = ltrim($token['content'], '#/'); $commentTokens = parent::tokenizeString('<?php ' . $content . '?>', $eolChar); // The first and last tokens are the open/close tags. array_shift($commentTokens); array_pop($commentTokens); if ($token['content'][0] === '#') { // The # character is not a comment in CSS files, so determine // what it means in this context. $firstContent = $commentTokens[0]['content']; // If the first content is just a number, it is probably a // colour like 8FB7DB, which PHP splits into 8 and FB7DB. if ($commentTokens[0]['code'] === T_LNUMBER && $commentTokens[1]['code'] === T_STRING) { $firstContent .= $commentTokens[1]['content']; array_shift($commentTokens); } // If the first content looks like a colour and not a class // definition, join the tokens together. if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1) { array_shift($commentTokens); $finalTokens[$newStackPtr] = array('type' => 'T_COLOUR', 'code' => T_COLOUR, 'content' => '#' . $firstContent); } else { $finalTokens[$newStackPtr] = array('type' => 'T_HASH', 'code' => T_HASH, 'content' => '#'); } } else { $finalTokens[$newStackPtr] = array('type' => 'T_STRING', 'code' => T_STRING, 'content' => '//'); } //end if $newStackPtr++; foreach ($commentTokens as $tokenData) { $finalTokens[$newStackPtr] = $tokenData; $newStackPtr++; } continue; } //end if $finalTokens[$newStackPtr] = $token; $newStackPtr++; } //end for $numTokens = count($finalTokens); for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { $token = $finalTokens[$stackPtr]; switch ($token['code']) { case T_MINUS: // Minus signs are often used instead of spaces inside // class names, IDs and styles. if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) { if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) { $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr - 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); unset($finalTokens[$stackPtr + 1]); $stackPtr -= 2; } else { $newContent = '-' . $finalTokens[$stackPtr + 1]['content']; $finalTokens[$stackPtr + 1]['content'] = $newContent; unset($finalTokens[$stackPtr]); $stackPtr--; } $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } else { if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) { // They can also be used to provide negative numbers. $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content']; unset($finalTokens[$stackPtr]); $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } } break; case T_COLON: // Find the previous content. for ($x = $stackPtr - 1; $x >= 0; $x--) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } $finalTokens[$x]['type'] = 'T_STYLE'; $finalTokens[$x]['code'] = T_STYLE; break; case T_STRING: if (strtolower($token['content']) === 'url') { // Find the next content. for ($x = $stackPtr + 1; $x < $numTokens; $x++) { if (in_array($finalTokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) { break; } } // Needs to be in the format url( for it to be a URL. if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) { continue; } // Join all the content together inside the url() statement. $newContent = ''; for ($i = $x + 2; $i < $numTokens; $i++) { if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) { break; } $newContent .= $finalTokens[$i]['content']; unset($finalTokens[$i]); } $finalTokens[$x + 1]['type'] = 'T_URL'; $finalTokens[$x + 1]['code'] = T_URL; $finalTokens[$x + 1]['content'] .= $newContent; $finalTokens = array_values($finalTokens); $numTokens = count($finalTokens); } //end if break; default: // Nothing special to be done with this token. break; } //end switch } //end for return $finalTokens; }