示例#1
0
 /**
  * Take an array of attribute names and values and normalize or discard
  * illegal values for the given whitelist.
  *
  * - Discards attributes not on the given whitelist
  * - Unsafe style attributes are discarded
  * - Invalid id attributes are re-encoded
  *
  * @param array $attribs
  * @param array $whitelist List of allowed attribute names
  * @return array
  *
  * @todo Check for legal values where the DTD limits things.
  * @todo Check for unique id attribute :P
  */
 static function validateAttributes($attribs, $whitelist)
 {
     $whitelist = array_flip($whitelist);
     $hrefExp = '/^(' . wfUrlProtocols() . ')[^\\s]+$/';
     $out = [];
     foreach ($attribs as $attribute => $value) {
         # Allow XML namespace declaration to allow RDFa
         if (preg_match(self::XMLNS_ATTRIBUTE_PATTERN, $attribute)) {
             if (!preg_match(self::EVIL_URI_PATTERN, $value)) {
                 $out[$attribute] = $value;
             }
             continue;
         }
         # Allow any attribute beginning with "data-"
         # However:
         # * data-ooui is reserved for ooui
         # * data-mw and data-parsoid are reserved for parsoid
         # * data-mw-<name here> is reserved for extensions (or core) if
         #   they need to communicate some data to the client and want to be
         #   sure that it isn't coming from an untrusted user.
         # * Ensure that the attribute is not namespaced by banning
         #   colons.
         if (!preg_match('/^data-(?!ooui|mw|parsoid)[^:]*$/i', $attribute) && !isset($whitelist[$attribute])) {
             continue;
         }
         # Strip javascript "expression" from stylesheets.
         # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
         if ($attribute == 'style') {
             $value = Sanitizer::checkCss($value);
         }
         # Escape HTML id attributes
         if ($attribute === 'id') {
             $value = Sanitizer::escapeId($value, 'noninitial');
         }
         # Escape HTML id reference lists
         if ($attribute === 'aria-describedby' || $attribute === 'aria-flowto' || $attribute === 'aria-labelledby' || $attribute === 'aria-owns') {
             $value = Sanitizer::escapeIdReferenceList($value, 'noninitial');
         }
         // RDFa and microdata properties allow URLs, URIs and/or CURIs.
         // Check them for sanity.
         if ($attribute === 'rel' || $attribute === 'rev' || $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || $attribute === 'datatype' || $attribute === 'typeof' || $attribute === 'itemid' || $attribute === 'itemprop' || $attribute === 'itemref' || $attribute === 'itemscope' || $attribute === 'itemtype') {
             // Paranoia. Allow "simple" values but suppress javascript
             if (preg_match(self::EVIL_URI_PATTERN, $value)) {
                 continue;
             }
         }
         # NOTE: even though elements using href/src are not allowed directly, supply
         #       validation code that can be used by tag hook handlers, etc
         if ($attribute === 'href' || $attribute === 'src') {
             if (!preg_match($hrefExp, $value)) {
                 continue;
                 // drop any href or src attributes not using an allowed protocol.
                 // NOTE: this also drops all relative URLs
             }
         }
         // If this attribute was previously set, override it.
         // Output should only have one attribute of each name.
         $out[$attribute] = $value;
     }
     # itemtype, itemid, itemref don't make sense without itemscope
     if (!array_key_exists('itemscope', $out)) {
         unset($out['itemtype']);
         unset($out['itemid']);
         unset($out['itemref']);
     }
     # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
     return $out;
 }
示例#2
0
 /**
  * Test escapeIdReferenceList for consistency with escapeId
  *
  * @dataProvider provideEscapeIdReferenceList
  * @covers Sanitizer::escapeIdReferenceList
  */
 public function testEscapeIdReferenceList($referenceList, $id1, $id2)
 {
     $this->assertEquals(Sanitizer::escapeIdReferenceList($referenceList, 'noninitial'), Sanitizer::escapeId($id1, 'noninitial') . ' ' . Sanitizer::escapeId($id2, 'noninitial'));
 }