예제 #1
0
 /**
  *  Parse a reference 
  */
 public function evaluate(Lexer $lexer)
 {
     switch (true) {
         case $lexer->isNextToken(Lexer::T_SHORT_P):
             throw new ParserException('Property \\p (Unicode Property) not supported use \\x to specify unicode character or range');
             break;
         case $lexer->isNextToken(Lexer::T_SHORT_UNICODE_X):
             $lexer->moveNext();
             if ($lexer->lookahead['value'] !== '{') {
                 throw new ParserException('Expecting character { after \\X none found');
             }
             $tokens = array();
             while ($lexer->moveNext() && $lexer->lookahead['value'] !== '}') {
                 # check if we nested eg.{ddd{d}
                 if ($lexer->lookahead['value'] === '{') {
                     throw new ParserException('Nesting hex value ranges is not allowed');
                 }
                 if ($lexer->lookahead['value'] !== " " && ctype_xdigit($lexer->lookahead['value']) === false) {
                     throw new ParserException(sprintf('Character %s is not a hexdeciaml digit', $lexer->lookahead['value']));
                 }
                 $tokens[] = $lexer->lookahead['value'];
             }
             # check that current lookahead is a closing character as it's possible to iterate to end of string (i.e. lookahead === null)
             if ($lexer->lookahead['value'] !== '}') {
                 throw new ParserException('Closing quantifier token `}` not found');
             }
             if (count($tokens) === 0) {
                 throw new ParserException('No hex number found inside the range');
             }
             $number = trim(implode('', $tokens));
             return Utf8::chr(hexdec($number));
             break;
         case $lexer->isNextToken(Lexer::T_SHORT_X):
             // only allow another 2 hex characters
             $glimpse = $lexer->glimpse();
             if ($glimpse['value'] === '{') {
                 throw new ParserException('Braces not supported here');
             }
             $tokens = array();
             $count = 2;
             while ($count > 0 && $lexer->moveNext()) {
                 $tokens[] = $lexer->lookahead['value'];
                 --$count;
             }
             $value = trim(implode('', $tokens));
             return Utf8::chr(hexdec($value));
             break;
         default:
             throw new ParserException('No Unicode expression to evaluate');
     }
 }
예제 #2
0
 /**
  * @covers Patchwork\Utf8::chr
  * @covers Patchwork\Utf8::ord
  */
 function testChrOrd()
 {
     foreach (self::$utf8ValidityMap as $u => $t) {
         if ($t) {
             $this->assertSame($u, u::chr(u::ord($u)));
         }
     }
 }
예제 #3
0
파일: Input.php 프로젝트: Mozan/core-bundle
 /**
  * Clean a value and try to prevent XSS attacks
  *
  * @param mixed   $varValue      A string or array
  * @param boolean $blnStrictMode If true, the function removes also JavaScript event handlers
  *
  * @return mixed The cleaned string or array
  */
 public static function xssClean($varValue, $blnStrictMode = false)
 {
     if ($varValue === null || $varValue == '') {
         return $varValue;
     }
     // Recursively clean arrays
     if (is_array($varValue)) {
         foreach ($varValue as $k => $v) {
             $varValue[$k] = static::xssClean($v);
         }
         return $varValue;
     }
     // Return if the value is not a string
     if (is_bool($varValue) || $varValue === null || is_numeric($varValue)) {
         return $varValue;
     }
     // Validate standard character entites and UTF16 two byte encoding
     $varValue = preg_replace('/(&#*\\w+)[\\x00-\\x20]+;/i', '$1;', $varValue);
     // Remove carriage returns
     $varValue = preg_replace('/\\r+/', '', $varValue);
     // Replace unicode entities
     $varValue = preg_replace_callback('~&#x([0-9a-f]+);~i', function ($matches) {
         return Utf8::chr(hexdec($matches[1]));
     }, $varValue);
     $varValue = preg_replace_callback('~&#([0-9]+);~', function ($matches) {
         return Utf8::chr($matches[1]);
     }, $varValue);
     // Remove null bytes
     $varValue = str_replace(chr(0), '', $varValue);
     // Remove encoded null bytes
     while (strpos($varValue, '\\0') !== false) {
         $varValue = str_replace('\\0', '', $varValue);
     }
     // Define a list of keywords
     $arrKeywords = array('/\\bj\\s*a\\s*v\\s*a\\s*s\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\bv\\s*b\\s*s\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\bv\\s*b\\s*s\\s*c\\s*r\\s*p\\s*t\\b/is', '/\\bs\\s*c\\s*r\\s*i\\s*p\\s*t\\b/is', '/\\ba\\s*p\\s*p\\s*l\\s*e\\s*t\\b/is', '/\\ba\\s*l\\s*e\\s*r\\s*t\\b/is', '/\\bd\\s*o\\s*c\\s*u\\s*m\\s*e\\s*n\\s*t\\b/is', '/\\bw\\s*r\\s*i\\s*t\\s*e\\b/is', '/\\bc\\s*o\\s*o\\s*k\\s*i\\s*e\\b/is', '/\\bw\\s*i\\s*n\\s*d\\s*o\\s*w\\b/is');
     // Compact exploded keywords like "j a v a s c r i p t"
     foreach ($arrKeywords as $strKeyword) {
         $arrMatches = array();
         preg_match_all($strKeyword, $varValue, $arrMatches);
         foreach ($arrMatches[0] as $strMatch) {
             $varValue = str_replace($strMatch, preg_replace('/\\s*/', '', $strMatch), $varValue);
         }
     }
     $arrRegexp[] = '/<(a|img)[^>]*[^a-z](<script|<xss)[^>]*>/is';
     $arrRegexp[] = '/<(a|img)[^>]*[^a-z]document\\.cookie[^>]*>/is';
     $arrRegexp[] = '/<(a|img)[^>]*[^a-z]vbscri?pt\\s*:[^>]*>/is';
     $arrRegexp[] = '/<(a|img)[^>]*[^a-z]expression\\s*\\([^>]*>/is';
     // Also remove event handlers and JavaScript in strict mode
     if ($blnStrictMode) {
         $arrRegexp[] = '/vbscri?pt\\s*:/is';
         $arrRegexp[] = '/javascript\\s*:/is';
         $arrRegexp[] = '/<\\s*embed.*swf/is';
         $arrRegexp[] = '/<(a|img)[^>]*[^a-z]alert\\s*\\([^>]*>/is';
         $arrRegexp[] = '/<(a|img)[^>]*[^a-z]javascript\\s*:[^>]*>/is';
         $arrRegexp[] = '/<(a|img)[^>]*[^a-z]window\\.[^>]*>/is';
         $arrRegexp[] = '/<(a|img)[^>]*[^a-z]document\\.[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onabort\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onblur\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onchange\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onclick\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onerror\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onfocus\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onkeypress\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onkeydown\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onkeyup\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onload\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onmouseover\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onmouseup\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onmousedown\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onmouseout\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onreset\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onselect\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onsubmit\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onunload\\s*=[^>]*>/is';
         $arrRegexp[] = '/<[^>]*[^a-z]onresize\\s*=[^>]*>/is';
     }
     $varValue = preg_replace($arrRegexp, '', $varValue);
     // Recheck for encoded null bytes
     while (strpos($varValue, '\\0') !== false) {
         $varValue = str_replace('\\0', '', $varValue);
     }
     return $varValue;
 }
예제 #4
0
 /**
  *  Fill a range given starting and ending character
  *
  *  @return void
  *  @access public
  */
 public function fillRange(Scope $head, $start, $end)
 {
     $start_index = Utf8::ord($start);
     $ending_index = Utf8::ord($end);
     if ($ending_index < $start_index) {
         throw new ParserException(sprintf('Character class range %s - %s is out of order', $start, $end));
     }
     for ($i = $start_index; $i <= $ending_index; $i++) {
         $head->setLiteral($i, Utf8::chr($i));
     }
 }
예제 #5
0
/**
 * Callback function for utf8_decode_entities
 *
 * @param array $matches
 *
 * @return string
 *
 * @deprecated Deprecated since Contao 4.0, to be removed in Contao 5.0.
 */
function utf8_hexchr_callback($matches)
{
    @trigger_error('Using utf8_hexchr_callback() has been deprecated and will no longer work in Contao 5.0.', E_USER_DEPRECATED);
    return Utf8::chr(hexdec($matches[1]));
}