示例#1
0
 /**
  * Take an array of attribute names and values and normalize or discard
  * illegal values for the given element type.
  *
  * - Discards attributes not on a whitelist for the given element
  * - Unsafe style attributes are discarded
  * - Invalid id attributes are re-encoded
  *
  * @param array $attribs
  * @param string $element
  * @return array
  *
  * @todo Check for legal values where the DTD limits things.
  * @todo Check for unique id attribute :P
  */
 static function validateTagAttributes($attribs, $element)
 {
     return Sanitizer::validateAttributes($attribs, Sanitizer::attributeWhitelist($element));
 }
示例#2
0
 /**
  * Take an array of attribute names and values and normalize or discard
  * illegal values for the given element type.
  *
  * - Discards attributes not on a whitelist for the given element
  * - Unsafe style attributes are discarded
  *
  * @param array $attribs
  * @param string $element
  * @return array
  *
  * @todo Check for legal values where the DTD limits things.
  * @todo Check for unique id attribute :P
  */
 static function validateTagAttributes($attribs, $element)
 {
     $whitelist = array_flip(Sanitizer::attributeWhitelist($element));
     $out = array();
     foreach ($attribs as $attribute => $value) {
         if (!isset($whitelist[$attribute])) {
             continue;
         }
         # Strip javascript "expression" from stylesheets.
         # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
         if ($attribute == 'style') {
             $value = Sanitizer::checkCss($value);
             if ($value === false) {
                 # haxx0r
                 continue;
             }
         }
         if ($attribute === 'id') {
             $value = Sanitizer::escapeId($value);
         }
         // If this attribute was previously set, override it.
         // Output should only have one attribute of each name.
         $out[$attribute] = $value;
     }
     return $out;
 }
示例#3
0
 /**
  * Take a tag soup fragment listing an HTML element's attributes
  * and normalize it to well-formed XML, discarding unwanted attributes.
  *
  * - Normalizes attribute names to lowercase
  * - Discards attributes not on a whitelist for the given element
  * - Turns broken or invalid entities into plaintext
  * - Double-quotes all attribute values
  * - Attributes without values are given the name as attribute
  * - Double attributes are discarded
  * - Unsafe style attributes are discarded
  * - Prepends space if there are attributes.
  *
  * @param string $text
  * @param string $element
  * @return string
  *
  * @todo Check for legal values where the DTD limits things.
  * @todo Check for unique id attribute :P
  */
 function fixTagAttributes($text, $element)
 {
     global $wgUrlProtocols;
     if (trim($text) == '') {
         return '';
     }
     # Unquoted attribute
     # Since we quote this later, this can be anything distinguishable
     # from the end of the attribute
     if (!preg_match_all(MW_ATTRIBS_REGEX, $text, $pairs, PREG_SET_ORDER)) {
         return '';
     }
     $whitelist = array_flip(Sanitizer::attributeWhitelist($element));
     $attribs = array();
     foreach ($pairs as $set) {
         $attribute = strtolower($set[1]);
         if (!isset($whitelist[$attribute])) {
             continue;
         }
         $raw = Sanitizer::getTagAttributeCallback($set);
         $value = Sanitizer::normalizeAttributeValue($raw);
         # Strip javascript "expression" from stylesheets.
         # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
         if ($attribute == 'style' && preg_match('/(expression|tps*:\\/\\/|url\\s*\\().*/is', Sanitizer::decodeCharReferences($value))) {
             # haxx0r
             continue;
         }
         # Templates and links may be expanded in later parsing,
         # creating invalid or dangerous output. Suppress this.
         $value = strtr($value, array('{' => '{', '[' => '[', "''" => '''', 'ISBN' => 'ISBN', 'RFC' => 'RFC', 'PMID' => 'PMID'));
         # Stupid hack
         $value = preg_replace_callback('/(' . $wgUrlProtocols . ')/', array('Sanitizer', 'armorLinksCallback'), $value);
         // If this attribute was previously set, override it.
         // Output should only have one attribute of each name.
         $attribs[$attribute] = "{$attribute}=\"{$value}\"";
     }
     if (empty($attribs)) {
         return '';
     } else {
         return ' ' . implode(' ', $attribs);
     }
 }
示例#4
0
 /**
  * Take a tag soup fragment listing an HTML element's attributes
  * and normalize it to well-formed XML, discarding unwanted attributes.
  *
  * - Normalizes attribute names to lowercase
  * - Discards attributes not on a whitelist for the given element
  * - Turns broken or invalid entities into plaintext
  * - Double-quotes all attribute values
  * - Attributes without values are given the name as attribute
  * - Double attributes are discarded
  * - Unsafe style attributes are discarded
  * - Prepends space if there are attributes.
  *
  * @param string $text
  * @param string $element
  * @return string
  *
  * @todo Check for legal values where the DTD limits things.
  * @todo Check for unique id attribute :P
  */
 function fixTagAttributes($text, $element)
 {
     if (trim($text) == '') {
         return '';
     }
     # Unquoted attribute
     # Since we quote this later, this can be anything distinguishable
     # from the end of the attribute
     $pairs = array();
     if (!preg_match_all(MW_ATTRIBS_REGEX, $text, $pairs, PREG_SET_ORDER)) {
         return '';
     }
     $whitelist = array_flip(Sanitizer::attributeWhitelist($element));
     $attribs = array();
     foreach ($pairs as $set) {
         $attribute = strtolower($set[1]);
         if (!isset($whitelist[$attribute])) {
             continue;
         }
         $raw = Sanitizer::getTagAttributeCallback($set);
         $value = Sanitizer::normalizeAttributeValue($raw);
         # Strip javascript "expression" from stylesheets.
         # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
         if ($attribute == 'style') {
             $stripped = Sanitizer::decodeCharReferences($value);
             // Remove any comments; IE gets token splitting wrong
             $stripped = preg_replace('!/\\*.*?\\*/!S', ' ', $stripped);
             $value = htmlspecialchars($stripped);
             // ... and continue checks
             $stripped = preg_replace('!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e', 'codepointToUtf8(hexdec("$1"))', $stripped);
             $stripped = str_replace('\\', '', $stripped);
             if (preg_match('/(expression|tps*:\\/\\/|url\\s*\\().*/is', $stripped)) {
                 # haxx0r
                 continue;
             }
         }
         if ($attribute === 'id') {
             $value = Sanitizer::escapeId($value);
         }
         # Templates and links may be expanded in later parsing,
         # creating invalid or dangerous output. Suppress this.
         $value = strtr($value, array('<' => '&lt;', '>' => '&gt;', '"' => '&quot;', '{' => '&#123;', '[' => '&#91;', "''" => '&#39;&#39;', 'ISBN' => '&#73;SBN', 'RFC' => '&#82;FC', 'PMID' => '&#80;MID'));
         # Stupid hack
         $value = preg_replace_callback('/(' . wfUrlProtocols() . ')/', array('Sanitizer', 'armorLinksCallback'), $value);
         // If this attribute was previously set, override it.
         // Output should only have one attribute of each name.
         $attribs[$attribute] = "{$attribute}=\"{$value}\"";
     }
     return count($attribs) ? ' ' . implode(' ', $attribs) : '';
 }