/** * Parse a reference */ public function evaluate(Lexer $lexer) { switch (true) { case $lexer->isNextToken(Lexer::T_SHORT_P): throw new ParserException('Property \\p (Unicode Property) not supported use \\x to specify unicode character or range'); break; case $lexer->isNextToken(Lexer::T_SHORT_UNICODE_X): $lexer->moveNext(); if ($lexer->lookahead['value'] !== '{') { throw new ParserException('Expecting character { after \\X none found'); } $tokens = array(); while ($lexer->moveNext() && $lexer->lookahead['value'] !== '}') { # check if we nested eg.{ddd{d} if ($lexer->lookahead['value'] === '{') { throw new ParserException('Nesting hex value ranges is not allowed'); } if ($lexer->lookahead['value'] !== " " && ctype_xdigit($lexer->lookahead['value']) === false) { throw new ParserException(sprintf('Character %s is not a hexdeciaml digit', $lexer->lookahead['value'])); } $tokens[] = $lexer->lookahead['value']; } # check that current lookahead is a closing character as it's possible to iterate to end of string (i.e. lookahead === null) if ($lexer->lookahead['value'] !== '}') { throw new ParserException('Closing quantifier token `}` not found'); } if (count($tokens) === 0) { throw new ParserException('No hex number found inside the range'); } $number = trim(implode('', $tokens)); return Utf8::chr(hexdec($number)); break; case $lexer->isNextToken(Lexer::T_SHORT_X): // only allow another 2 hex characters $glimpse = $lexer->glimpse(); if ($glimpse['value'] === '{') { throw new ParserException('Braces not supported here'); } $tokens = array(); $count = 2; while ($count > 0 && $lexer->moveNext()) { $tokens[] = $lexer->lookahead['value']; --$count; } $value = trim(implode('', $tokens)); return Utf8::chr(hexdec($value)); break; default: throw new ParserException('No Unicode expression to evaluate'); } }
/** * @covers Patchwork\Utf8::chr * @covers Patchwork\Utf8::ord */ function testChrOrd() { foreach (self::$utf8ValidityMap as $u => $t) { if ($t) { $this->assertSame($u, u::chr(u::ord($u))); } } }
/** * Clean a value and try to prevent XSS attacks * * @param mixed $varValue A string or array * @param boolean $blnStrictMode If true, the function removes also JavaScript event handlers * * @return mixed The cleaned string or array */ public static function xssClean($varValue, $blnStrictMode = false) { if ($varValue === null || $varValue == '') { return $varValue; } // Recursively clean arrays if (is_array($varValue)) { foreach ($varValue as $k => $v) { $varValue[$k] = static::xssClean($v); } return $varValue; } // Return if the value is not a string if (is_bool($varValue) || $varValue === null || is_numeric($varValue)) { return $varValue; } // Validate standard character entites and UTF16 two byte encoding $varValue = preg_replace('/(&#*\\w+)[\\x00-\\x20]+;/i', '$1;', $varValue); // Remove carriage returns $varValue = preg_replace('/\\r+/', '', $varValue); // Replace unicode entities $varValue = preg_replace_callback('~&#x([0-9a-f]+);~i', function ($matches) { return Utf8::chr(hexdec($matches[1])); }, $varValue); $varValue = preg_replace_callback('~&#([0-9]+);~', function ($matches) { return Utf8::chr($matches[1]); }, $varValue); // Remove null bytes $varValue = str_replace(chr(0), '', $varValue); // Remove encoded null bytes while (strpos($varValue, '\\0') !== false) { $varValue = str_replace('\\0', '', $varValue); } // Define a list of keywords $arrKeywords = array('/\\bj\\s*a\\s*v\\s*a\\s*s\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\bv\\s*b\\s*s\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\bv\\s*b\\s*s\\s*c\\s*r\\s*p\\s*t\\b/is', '/\\bs\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\ba\\s*p\\s*p\\s*l\\s*e\\s*t\\b/is', '/\\ba\\s*l\\s*e\\s*r\\s*t\\b/is', '/\\bd\\s*o\\s*c\\s*u\\s*m\\s*e\\s*n\\s*t\\b/is', '/\\bw\\s*r\\s*i\\s*t\\s*e\\b/is', '/\\bc\\s*o\\s*o\\s*k\\s*i\\s*e\\b/is', '/\\bw\\s*i\\s*n\\s*d\\s*o\\s*w\\b/is'); // Compact exploded keywords like "j a v a s c r i p t" foreach ($arrKeywords as $strKeyword) { $arrMatches = array(); preg_match_all($strKeyword, $varValue, $arrMatches); foreach ($arrMatches[0] as $strMatch) { $varValue = str_replace($strMatch, preg_replace('/\\s*/', '', $strMatch), $varValue); } } $arrRegexp[] = '/<(a|img)[^>]*[^a-z](<script|<xss)[^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]document\\.cookie[^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]vbscri?pt\\s*:[^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]expression\\s*\\([^>]*>/is'; // Also remove event handlers and JavaScript in strict mode if ($blnStrictMode) { $arrRegexp[] = '/vbscri?pt\\s*:/is'; $arrRegexp[] = '/javascript\\s*:/is'; $arrRegexp[] = '/<\\s*embed.*swf/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]alert\\s*\\([^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]javascript\\s*:[^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]window\\.[^>]*>/is'; $arrRegexp[] = '/<(a|img)[^>]*[^a-z]document\\.[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onabort\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onblur\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onchange\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onclick\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onerror\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onfocus\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onkeypress\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onkeydown\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onkeyup\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onload\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onmouseover\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onmouseup\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onmousedown\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onmouseout\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onreset\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onselect\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onsubmit\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onunload\\s*=[^>]*>/is'; $arrRegexp[] = '/<[^>]*[^a-z]onresize\\s*=[^>]*>/is'; } $varValue = preg_replace($arrRegexp, '', $varValue); // Recheck for encoded null bytes while (strpos($varValue, '\\0') !== false) { $varValue = str_replace('\\0', '', $varValue); } return $varValue; }
/** * Fill a range given starting and ending character * * @return void * @access public */ public function fillRange(Scope $head, $start, $end) { $start_index = Utf8::ord($start); $ending_index = Utf8::ord($end); if ($ending_index < $start_index) { throw new ParserException(sprintf('Character class range %s - %s is out of order', $start, $end)); } for ($i = $start_index; $i <= $ending_index; $i++) { $head->setLiteral($i, Utf8::chr($i)); } }
/** * Callback function for utf8_decode_entities * * @param array $matches * * @return string * * @deprecated Deprecated since Contao 4.0, to be removed in Contao 5.0. */ function utf8_hexchr_callback($matches) { @trigger_error('Using utf8_hexchr_callback() has been deprecated and will no longer work in Contao 5.0.', E_USER_DEPRECATED); return Utf8::chr(hexdec($matches[1])); }