Esempio n. 1
0
 /**
  * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
  *
  * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
  * For examples of various XSS attacks, see: http://ha.ckers.org/xss.html.
  *
  * This code does five things:
  * - Removes characters and constructs that can trick browsers.
  * - Makes sure all HTML entities are well-formed.
  * - Makes sure all HTML tags and attributes are well-formed.
  * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
  *   javascript:).
  * - Marks the sanitized, XSS-safe version of $string as safe markup for
  *   rendering.
  *
  * @param $string
  *   The string with raw HTML in it. It will be stripped of everything that
  *   can cause an XSS attack.
  * @param array $html_tags
  *   An array of HTML tags.
  * @param bool $mode
  *   (optional) Defaults to FILTER_MODE_WHITELIST ($html_tags is used as a
  *   whitelist of allowed tags), but can also be set to FILTER_MODE_BLACKLIST
  *   ($html_tags is used as a blacklist of disallowed tags).
  *
  * @return string
  *   An XSS safe version of $string, or an empty string if $string is not
  *   valid UTF-8.
  *
  * @see \Drupal\Component\Utility\Unicode::validateUtf8()
  * @see \Drupal\Component\Utility\SafeMarkup
  *
  * @ingroup sanitization
  */
 public static function filter($string, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd'), $mode = Xss::FILTER_MODE_WHITELIST)
 {
     // Only operate on valid UTF-8 strings. This is necessary to prevent cross
     // site scripting issues on Internet Explorer 6.
     if (!Unicode::validateUtf8($string)) {
         return '';
     }
     // Remove NULL characters (ignored by some browsers).
     $string = str_replace(chr(0), '', $string);
     // Remove Netscape 4 JS entities.
     $string = preg_replace('%&\\s*\\{[^}]*(\\}\\s*;?|$)%', '', $string);
     // Defuse all HTML entities.
     $string = str_replace('&', '&', $string);
     // Change back only well-formed entities in our whitelist:
     // Decimal numeric entities.
     $string = preg_replace('/&#([0-9]+;)/', '&#\\1', $string);
     // Hexadecimal numeric entities.
     $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\\1', $string);
     // Named entities.
     $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\\1', $string);
     $html_tags = array_flip($html_tags);
     $splitter = function ($matches) use($html_tags, $mode) {
         return static::split($matches[1], $html_tags, $mode);
     };
     return SafeMarkup::set(preg_replace_callback('%
   (
   <(?=[^a-zA-Z!/])  # a lone <
   |                 # or
   <!--.*?-->        # a comment
   |                 # or
   <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
   |                 # or
   >                 # just a >
   )%x', $splitter, $string));
 }
Esempio n. 2
0
File: Xss.php Progetto: scratch/gai
 /**
  * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
  *
  * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
  * For examples of various XSS attacks, see: http://ha.ckers.org/xss.html.
  *
  * This code does four things:
  * - Removes characters and constructs that can trick browsers.
  * - Makes sure all HTML entities are well-formed.
  * - Makes sure all HTML tags and attributes are well-formed.
  * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
  *   javascript:).
  *
  * @param $string
  *   The string with raw HTML in it. It will be stripped of everything that
  *   can cause an XSS attack.
  * @param array $html_tags
  *   An array of HTML tags.
  *
  * @return string
  *   An XSS safe version of $string, or an empty string if $string is not
  *   valid UTF-8.
  *
  * @see \Drupal\Component\Utility\Unicode::validateUtf8()
  *
  * @ingroup sanitization
  */
 public static function filter($string, array $html_tags = NULL)
 {
     if (is_null($html_tags)) {
         $html_tags = static::$htmlTags;
     }
     // Only operate on valid UTF-8 strings. This is necessary to prevent cross
     // site scripting issues on Internet Explorer 6.
     if (!Unicode::validateUtf8($string)) {
         return '';
     }
     // Remove NULL characters (ignored by some browsers).
     $string = str_replace(chr(0), '', $string);
     // Remove Netscape 4 JS entities.
     $string = preg_replace('%&\\s*\\{[^}]*(\\}\\s*;?|$)%', '', $string);
     // Defuse all HTML entities.
     $string = str_replace('&', '&amp;', $string);
     // Change back only well-formed entities in our whitelist:
     // Decimal numeric entities.
     $string = preg_replace('/&amp;#([0-9]+;)/', '&#\\1', $string);
     // Hexadecimal numeric entities.
     $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\\1', $string);
     // Named entities.
     $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\\1', $string);
     $html_tags = array_flip($html_tags);
     // Late static binding does not work inside anonymous functions.
     $class = get_called_class();
     $splitter = function ($matches) use($html_tags, $class) {
         return $class::split($matches[1], $html_tags, $class);
     };
     // Strip any tags that are not in the whitelist, then mark the text as safe
     // for output. All other known XSS vectors have been filtered out by this
     // point and any HTML tags remaining will have been deliberately allowed, so
     // it is acceptable to call SafeMarkup::set() on the resultant string.
     return preg_replace_callback('%
   (
   <(?=[^a-zA-Z!/])  # a lone <
   |                 # or
   <!--.*?-->        # a comment
   |                 # or
   <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
   |                 # or
   >                 # just a >
   )%x', $splitter, $string);
 }