/** * Formats text for emphasized display in a placeholder inside a sentence. * * Used automatically by self::format(). * * @param string $text * The text to format (plain-text). * * @return string * The formatted text (html). */ public static function placeholder($text) { return SafeMarkup::set('<em class="placeholder">' . static::checkPlain($text) . '</em>'); }
/** * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities. * * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses. * For examples of various XSS attacks, see: http://ha.ckers.org/xss.html. * * This code does five things: * - Removes characters and constructs that can trick browsers. * - Makes sure all HTML entities are well-formed. * - Makes sure all HTML tags and attributes are well-formed. * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. * javascript:). * - Marks the sanitized, XSS-safe version of $string as safe markup for * rendering. * * @param $string * The string with raw HTML in it. It will be stripped of everything that * can cause an XSS attack. * @param array $html_tags * An array of HTML tags. * * @return string * An XSS safe version of $string, or an empty string if $string is not * valid UTF-8. * * @see \Drupal\Component\Utility\Unicode::validateUtf8() * @see \Drupal\Component\Utility\SafeMarkup * * @ingroup sanitization */ public static function filter($string, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { // Only operate on valid UTF-8 strings. This is necessary to prevent cross // site scripting issues on Internet Explorer 6. if (!Unicode::validateUtf8($string)) { return ''; } // Remove NULL characters (ignored by some browsers). $string = str_replace(chr(0), '', $string); // Remove Netscape 4 JS entities. $string = preg_replace('%&\\s*\\{[^}]*(\\}\\s*;?|$)%', '', $string); // Defuse all HTML entities. $string = str_replace('&', '&', $string); // Change back only well-formed entities in our whitelist: // Decimal numeric entities. $string = preg_replace('/&#([0-9]+;)/', '&#\\1', $string); // Hexadecimal numeric entities. $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\\1', $string); // Named entities. $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\\1', $string); $html_tags = array_flip($html_tags); // Late static binding does not work inside anonymous functions. $class = get_called_class(); $splitter = function ($matches) use($html_tags, $class) { return $class::split($matches[1], $html_tags, $class); }; // Strip any tags that are not in the whitelist, then mark the text as safe // for output. All other known XSS vectors have been filtered out by this // point and any HTML tags remaining will have been deliberately allowed, so // it is acceptable to call SafeMarkup::set() on the resultant string. return SafeMarkup::set(preg_replace_callback('% ( <(?=[^a-zA-Z!/]) # a lone < | # or <!--.*?--> # a comment | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > )%x', $splitter, $string)); }
/** * Replaces all occurrences of the search string with the replacement string. * * Functions identically to str_replace(), but marks the returned output as * safe if all the inputs and the subject have also been marked as safe. * * @param string|array $search * The value being searched for. An array may be used to designate multiple * values to search for. * @param string|array $replace * The replacement value that replaces found search values. An array may be * used to designate multiple replacements. * @param string $subject * The string or array being searched and replaced on. * * @return string * The passed subject with replaced values. */ public static function replace($search, $replace, $subject) { $output = str_replace($search, $replace, $subject); // If any replacement is unsafe, then the output is also unsafe, so just // return the output. if (!is_array($replace)) { if (!SafeMarkup::isSafe($replace)) { return $output; } } else { foreach ($replace as $replacement) { if (!SafeMarkup::isSafe($replacement)) { return $output; } } } // If the subject is unsafe, then the output is as well, so return it. if (!SafeMarkup::isSafe($subject)) { return $output; } else { // If we have reached this point, then all replacements were safe. If the // subject was also safe, then mark the entire output as safe. return SafeMarkup::set($output); } }
/** * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities. * * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses. * For examples of various XSS attacks, see: http://ha.ckers.org/xss.html. * * This code does five things: * - Removes characters and constructs that can trick browsers. * - Makes sure all HTML entities are well-formed. * - Makes sure all HTML tags and attributes are well-formed. * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. * javascript:). * - Marks the sanitized, XSS-safe version of $string as safe markup for * rendering. * * @param $string * The string with raw HTML in it. It will be stripped of everything that * can cause an XSS attack. * @param array $html_tags * An array of HTML tags. * @param bool $mode * (optional) Defaults to FILTER_MODE_WHITELIST ($html_tags is used as a * whitelist of allowed tags), but can also be set to FILTER_MODE_BLACKLIST * ($html_tags is used as a blacklist of disallowed tags). * * @return string * An XSS safe version of $string, or an empty string if $string is not * valid UTF-8. * * @see \Drupal\Component\Utility\Unicode::validateUtf8() * @see \Drupal\Component\Utility\SafeMarkup * * @ingroup sanitization */ public static function filter($string, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd'), $mode = Xss::FILTER_MODE_WHITELIST) { // Only operate on valid UTF-8 strings. This is necessary to prevent cross // site scripting issues on Internet Explorer 6. if (!Unicode::validateUtf8($string)) { return ''; } // Remove NULL characters (ignored by some browsers). $string = str_replace(chr(0), '', $string); // Remove Netscape 4 JS entities. $string = preg_replace('%&\\s*\\{[^}]*(\\}\\s*;?|$)%', '', $string); // Defuse all HTML entities. $string = str_replace('&', '&', $string); // Change back only well-formed entities in our whitelist: // Decimal numeric entities. $string = preg_replace('/&#([0-9]+;)/', '&#\\1', $string); // Hexadecimal numeric entities. $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\\1', $string); // Named entities. $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\\1', $string); $html_tags = array_flip($html_tags); $splitter = function ($matches) use($html_tags, $mode) { return static::split($matches[1], $html_tags, $mode); }; return SafeMarkup::set(preg_replace_callback('% ( <(?=[^a-zA-Z!/]) # a lone < | # or <!--.*?--> # a comment | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > )%x', $splitter, $string)); }