/** * Builds an attribute list from string containing attributes. * * This function does a lot of work. It parses an attribute list into an array * with attribute data, and tries to do the right thing even if it gets weird * input. It will add quotes around attribute values that don't have any quotes * or apostrophes around them, to make it easier to produce HTML code that will * conform to W3C's HTML specification. It will also remove bad URL protocols * from attribute values. It also reduces duplicate attributes by using the * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). * * @since 1.0.0 * * @param string $attr Attribute list from HTML element to closing HTML element tag * @param array $allowed_protocols Allowed protocols to keep * @return array List of attributes after parsing */ function asc_kses_hair($attr, $allowed_protocols) { $attrarr = array(); $mode = 0; $attrname = ''; $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); # Loop through the whole attribute list while (strlen($attr) != 0) { $working = 0; # Was the last operation successful? switch ($mode) { case 0: # attribute name, href for instance if (preg_match('/^([-a-zA-Z:]+)/', $attr, $match)) { $attrname = $match[1]; $working = $mode = 1; $attr = preg_replace('/^[-a-zA-Z:]+/', '', $attr); } break; case 1: # equals sign or valueless ("selected") if (preg_match('/^\\s*=\\s*/', $attr)) { $working = 1; $mode = 2; $attr = preg_replace('/^\\s*=\\s*/', '', $attr); break; } if (preg_match('/^\\s+/', $attr)) { $working = 1; $mode = 0; if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); } $attr = preg_replace('/^\\s+/', '', $attr); } break; case 2: # attribute value, a URL after href= for instance if (preg_match('%^"([^"]*)"(\\s+|/?$)%', $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n'); } $working = 1; $mode = 0; $attr = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attr); break; } if (preg_match("%^'([^']*)'(\\s+|/?\$)%", $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}='{$thisval}'", 'vless' => 'n'); } $working = 1; $mode = 0; $attr = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attr); break; } if (preg_match("%^([^\\s\"']+)(\\s+|/?\$)%", $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n'); } # We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; $attr = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attr); } break; } # switch if ($working == 0) { $attr = asc_kses_html_error($attr); $mode = 0; } } # while if ($mode == 1 && false === array_key_exists($attrname, $attrarr)) { # special case, for when the attribute list ends with a valueless # attribute like "selected" $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); } return $attrarr; }
/** * Checks and cleans a URL. * * A number of characters are removed from the URL. If the URL is for displaying * (the default behaviour) ampersands are also replaced. The 'clean_url' filter * is applied to the returned cleaned URL. * * @since 2.8.0 * @uses asc_kses_bad_protocol() To only permit protocols in the URL set * via $protocols or the common ones set in the function. * * @param string $url The URL to be cleaned. * @param array $protocols Optional. An array of acceptable protocols. * Defaults to 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn' if not set. * @param string $_context Private. Use esc_url_raw() for database usage. * @return string The cleaned $url after the 'clean_url' filter is applied. */ function esc_url($url, $protocols = null, $_context = 'display') { $original_url = $url; if ('' == $url) { return $url; } $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\\|*\'()\\x80-\\xff]|i', '', $url); $strip = array('%0d', '%0a', '%0D', '%0A'); $url = _deep_replace($strip, $url); $url = str_replace(';//', '://', $url); /* If the URL doesn't appear to contain a scheme, we * presume it needs http:// appended (unless a relative * link starting with /, # or ? or a php file). */ if (strpos($url, ':') === false && !in_array($url[0], array('/', '#', '?')) && !preg_match('/^[a-z0-9-]+?\\.php/i', $url)) { $url = 'http://' . $url; } // Replace ampersands and single quotes only when displaying. if ('display' == $_context) { $url = asc_kses_normalize_entities($url); $url = str_replace('&', '&', $url); $url = str_replace("'", ''', $url); } if ('/' === $url[0]) { $good_protocol_url = $url; } else { if (!is_array($protocols)) { $protocols = asc_allowed_protocols(); } $good_protocol_url = asc_kses_bad_protocol($url, $protocols); if (strtolower($good_protocol_url) != strtolower($url)) { return ''; } } /** * Filter a string cleaned and escaped for output as a URL. * * @since 2.3.0 * * @param string $good_protocol_url The cleaned URL to be returned. * @param string $original_url The URL prior to cleaning. * @param string $_context If 'display', replace ampersands and single quotes only. */ return apply_filters('clean_url', $good_protocol_url, $original_url, $_context); }