/** * Processes a string of HTML attributes. * * @param string $attributes * The html attribute to process. * * @return string * Cleaned up version of the HTML attributes. */ protected static function attributes($attributes) { $attributes_array = array(); $mode = 0; $attribute_name = ''; $skip = FALSE; $skip_protocol_filtering = FALSE; while (strlen($attributes) != 0) { // Was the last operation successful? $working = 0; switch ($mode) { case 0: // Attribute name, href for instance. if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) { $attribute_name = strtolower($match[1]); $skip = $attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on'; // Values for attributes of type URI should be filtered for // potentially malicious protocols (for example, an href-attribute // starting with "javascript:"). However, for some non-URI // attributes performing this filtering causes valid and safe data // to be mangled. We prevent this by skipping protocol filtering on // such attributes. // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol() // @see http://www.w3.org/TR/html4/index/attributes.html $skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' || in_array($attribute_name, array('title', 'alt')); $working = $mode = 1; $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes); } break; case 1: // Equals sign or valueless ("selected"). if (preg_match('/^\\s*=\\s*/', $attributes)) { $working = 1; $mode = 2; $attributes = preg_replace('/^\\s*=\\s*/', '', $attributes); break; } if (preg_match('/^\\s+/', $attributes)) { $working = 1; $mode = 0; if (!$skip) { $attributes_array[] = $attribute_name; } $attributes = preg_replace('/^\\s+/', '', $attributes); } break; case 2: // Attribute value, a URL after href= for instance. if (preg_match('/^"([^"]*)"(\\s+|$)/', $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}=\"{$thisval}\""; } $working = 1; $mode = 0; $attributes = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attributes); break; } if (preg_match("/^'([^']*)'(\\s+|\$)/", $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}='{$thisval}'"; } $working = 1; $mode = 0; $attributes = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attributes); break; } if (preg_match("%^([^\\s\"']+)(\\s+|\$)%", $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}=\"{$thisval}\""; } $working = 1; $mode = 0; $attributes = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attributes); } break; } if ($working == 0) { // Not well formed; remove and try again. $attributes = preg_replace('/ ^ ( "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string | # or \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string | # or \\S # - a non-whitespace character )* # any number of the above three \\s* # any number of whitespaces /x', '', $attributes); $mode = 0; } } // The attribute list ends with a valueless attribute like "selected". if ($mode == 1 && !$skip) { $attributes_array[] = $attribute_name; } return $attributes_array; }
/** * Processes a string of HTML attributes. * * @param string $attributes * The html attribute to process. * * @return string * Cleaned up version of the HTML attributes. */ protected static function attributes($attributes) { $attributes_array = array(); $mode = 0; $attribute_name = ''; $skip = FALSE; while (strlen($attributes) != 0) { // Was the last operation successful? $working = 0; switch ($mode) { case 0: // Attribute name, href for instance. if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) { $attribute_name = strtolower($match[1]); $skip = $attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on'; $working = $mode = 1; $attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes); } break; case 1: // Equals sign or valueless ("selected"). if (preg_match('/^\\s*=\\s*/', $attributes)) { $working = 1; $mode = 2; $attributes = preg_replace('/^\\s*=\\s*/', '', $attributes); break; } if (preg_match('/^\\s+/', $attributes)) { $working = 1; $mode = 0; if (!$skip) { $attributes_array[] = $attribute_name; } $attributes = preg_replace('/^\\s+/', '', $attributes); } break; case 2: // Attribute value, a URL after href= for instance. if (preg_match('/^"([^"]*)"(\\s+|$)/', $attributes, $match)) { $thisval = UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}=\"{$thisval}\""; } $working = 1; $mode = 0; $attributes = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attributes); break; } if (preg_match("/^'([^']*)'(\\s+|\$)/", $attributes, $match)) { $thisval = UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}='{$thisval}'"; } $working = 1; $mode = 0; $attributes = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attributes); break; } if (preg_match("%^([^\\s\"']+)(\\s+|\$)%", $attributes, $match)) { $thisval = UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "{$attribute_name}=\"{$thisval}\""; } $working = 1; $mode = 0; $attributes = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attributes); } break; } if ($working == 0) { // Not well formed; remove and try again. $attributes = preg_replace('/ ^ ( "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string | # or \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string | # or \\S # - a non-whitespace character )* # any number of the above three \\s* # any number of whitespaces /x', '', $attributes); $mode = 0; } } // The attribute list ends with a valueless attribute like "selected". if ($mode == 1 && !$skip) { $attributes_array[] = $attribute_name; } return $attributes_array; }