/** * 清理属性 */ public function attributes($attributes, $elem = '') { $return = array(); $mode = 0; $attrname = ''; $skip = false; while (strlen($attributes) != 0) { $working = 0; switch ($mode) { //属性名 case 0: if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) { $working = 1; $mode = 1; $attrname = strtolower($match[1]); $skip = substr($attrname, 0, 2) == 'on'; $attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes); } break; //单个的属性值 //单个的属性值 case 1: if (preg_match('/^\\s*=\\s*/', $attributes)) { $working = 1; $mode = 2; $attributes = preg_replace('/^\\s*=\\s*/', '', $attributes); break; } if (preg_match('/^\\s+/', $attributes)) { $working = 1; $mode = 0; if (!$skip) { $return[$attrname] = array(); } $attributes = preg_replace('/^\\s+/', '', $attributes); } break; //属性值 //属性值 case 2: if (preg_match('/^"([^"]*)"(\\s+|$)/', $attributes, $match)) { $working = 1; $mode = 0; if (!$skip) { $return[$attrname] = array('value' => $match[1], 'delimiter' => '"'); } $attributes = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attributes); break; } if (preg_match("/^'([^']*)'(\\s+|\$)/", $attributes, $match)) { $working = 1; $mode = 0; if (!$skip) { $return[$attrname] = array('value' => $match[1], 'delimiter' => "'"); } $attributes = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attributes); break; } if (preg_match("%^([^\\s\"']+)(\\s+|\$)%", $attributes, $match)) { $working = 1; $mode = 0; if (!$skip) { $return[$attrname] = array('value' => $match[1], 'delimiter' => '"'); } $attributes = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attributes); } break; } //没有匹配到的,直接过滤 if ($working == 0) { $attributes = preg_replace('/ ^ ( "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string | # or \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string | # or \\S # - a non-whitespace character )* # any number of the above three \\s* # any number of whitespaces /x', '', $attributes); $mode = 0; } } if ($mode == 1 && !$skip) { $return[$attrname] = array(); } //执行属性的清理 $tag = isset($this->allowedTags[$elem]) ? $this->allowedTags[$elem] : array(); foreach ($return as $name => $info) { if (!isset($info['value'])) { continue; } //去掉不允许的 if (isset($tag['disallowed']) && in_array($name, $tag['disallowed'])) { unset($return[$name]); continue; } //只留允许的 if (isset($tag['allowed']) && !in_array($name, $tag['allowed'])) { unset($return[$name]); continue; } //对style深度清理 if ($name == 'style') { $sanitized_properties = array(); $properties = array_filter(array_map('trim', explode(';', StringTool::decodeEntities($info['value'])))); foreach ($properties as $property) { if (!preg_match('#^([a-zA-Z][-a-zA-Z]*)\\s*:\\s*(.*)$#', $property, $property_matches)) { continue; } $property_name = strtolower($property_matches[1]); $property_value =& $property_matches[2]; if (!isset($this->allowedStyleProperties[$property_name])) { continue; } if (strpos($property_value, 'url(') !== false) { if (!preg_match('`url\\(\\s*(([\'"]?)(?:[^)]|(?<=\\\\)\\))+[\'"]?)\\s*\\)`', $property_value, $url) || empty($url[1])) { continue; } if (!empty($url[2])) { if (substr($url[1], -1) != $url[2]) { continue; } $url[1] = substr($url[1], 1, -1); } $url = preg_replace('`\\\\([(),\'"\\s])`', '\\1', $url[1]); if (StringTool::filterBadProtocol($url) != $url) { continue; } if (!preg_match('`^/[^/]+`', $url)) { $match = false; foreach ($this->allowedStyleDomain as $reg) { if (preg_match($reg, $url)) { $match = true; break; } } if (!$match) { continue; } } } $sanitized_properties[] = $property_name . ':' . StringTool::checkPlain($property_value); } if (empty($sanitized_properties)) { unset($return[$name]); continue; } $info['value'] = implode('; ', $sanitized_properties); } else { $info['value'] = StringTool::filterBadProtocol($info['value']); } $return[$name] = $name . '=' . $info['delimiter'] . $info['value'] . $info['delimiter']; } return $return; }