/** * Checks and cleans a URL. * * A number of characters are removed from the URL. If the URL is for displaying * (the default behaviour) ampersands are also replaced. The 'clean_url' filter * is applied to the returned cleaned URL. * * @since 0.0.1 * * @param string $url The URL to be cleaned. * @param array $protocols Optional. An array of acceptable protocols. * Defaults to return value of hq_allowed_protocols() * @param string $_context Private. Use esc_url_raw() for database usage. * @return string The cleaned $url after the 'clean_url' filter is applied. */ function esc_url($url, $protocols = null, $_context = 'display') { $original_url = $url; if ('' == $url) { return $url; } $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\\|*\'()\\x80-\\xff]|i', '', $url); if (0 !== stripos($url, 'mailto:')) { $strip = array('%0d', '%0a', '%0D', '%0A'); $url = _deep_replace($strip, $url); } $url = str_replace(';//', '://', $url); /* If the URL doesn't appear to contain a scheme, we * presume it needs http:// appended (unless a relative * link starting with /, # or ? or a php file). */ if (strpos($url, ':') === false && !in_array($url[0], array('/', '#', '?')) && !preg_match('/^[a-z0-9-]+?\\.php/i', $url)) { $url = 'http://' . $url; } // Replace ampersands and single quotes only when displaying. if ('display' == $_context) { $url = hq_kses_normalize_entities($url); $url = str_replace('&', '&', $url); $url = str_replace("'", ''', $url); } if ('/' === $url[0]) { $good_protocol_url = $url; } else { if (!is_array($protocols)) { $protocols = hq_allowed_protocols(); } $good_protocol_url = hq_kses_bad_protocol($url, $protocols); if (strtolower($good_protocol_url) != strtolower($url)) { return ''; } } /** * Filter a string cleaned and escaped for output as a URL. * * @since 0.0.1 * * @param string $good_protocol_url The cleaned URL to be returned. * @param string $original_url The URL prior to cleaning. * @param string $_context If 'display', replace ampersands and single quotes only. */ return apply_filters('clean_url', $good_protocol_url, $original_url, $_context); }
/** * Validate a URL for safe use in the HTTP API. * * @since 0.0.1 * * @param string $url * @return false|string URL or false on failure. */ function hq_http_validate_url($url) { $original_url = $url; $url = hq_kses_bad_protocol($url, array('http', 'https')); if (!$url || strtolower($url) !== strtolower($original_url)) { return false; } $parsed_url = @parse_url($url); if (!$parsed_url || empty($parsed_url['host'])) { return false; } if (isset($parsed_url['user']) || isset($parsed_url['pass'])) { return false; } if (false !== strpbrk($parsed_url['host'], ':#?[]')) { return false; } $parsed_home = @parse_url(get_option('home')); $same_host = strtolower($parsed_home['host']) === strtolower($parsed_url['host']); if (!$same_host) { $host = trim($parsed_url['host'], '.'); if (preg_match('#^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$#', $host)) { $ip = $host; } else { $ip = gethostbyname($host); if ($ip === $host) { // Error condition for gethostbyname() $ip = false; } } if ($ip) { $parts = array_map('intval', explode('.', $ip)); if (127 === $parts[0] || 10 === $parts[0] || 172 === $parts[0] && 16 <= $parts[1] && 31 >= $parts[1] || 192 === $parts[0] && 168 === $parts[1]) { // If host appears local, reject unless specifically allowed. /** * Check if HTTP request is external or not. * * Allows to change and allow external requests for the HTTP request. * * @since 0.0.1 * * @param bool false Whether HTTP request is external or not. * @param string $host IP of the requested host. * @param string $url URL of the requested host. */ if (!apply_filters('http_request_host_is_external', false, $host, $url)) { return false; } } } } if (empty($parsed_url['port'])) { return $url; } $port = $parsed_url['port']; if (80 === $port || 443 === $port || 8080 === $port) { return $url; } if ($parsed_home && $same_host && isset($parsed_home['port']) && $parsed_home['port'] === $port) { return $url; } return false; }
/** * Builds an attribute list from string containing attributes. * * This function does a lot of work. It parses an attribute list into an array * with attribute data, and tries to do the right thing even if it gets weird * input. It will add quotes around attribute values that don't have any quotes * or apostrophes around them, to make it easier to produce HTML code that will * conform to W3C's HTML specification. It will also remove bad URL protocols * from attribute values. It also reduces duplicate attributes by using the * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). * * @since 0.0.1 * * @param string $attr Attribute list from HTML element to closing HTML element tag * @param array $allowed_protocols Allowed protocols to keep * @return array List of attributes after parsing */ function hq_kses_hair($attr, $allowed_protocols) { $attrarr = array(); $mode = 0; $attrname = ''; $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); // Loop through the whole attribute list while (strlen($attr) != 0) { $working = 0; // Was the last operation successful? switch ($mode) { case 0: // attribute name, href for instance if (preg_match('/^([-a-zA-Z:]+)/', $attr, $match)) { $attrname = $match[1]; $working = $mode = 1; $attr = preg_replace('/^[-a-zA-Z:]+/', '', $attr); } break; case 1: // equals sign or valueless ("selected") if (preg_match('/^\\s*=\\s*/', $attr)) { $working = 1; $mode = 2; $attr = preg_replace('/^\\s*=\\s*/', '', $attr); break; } if (preg_match('/^\\s+/', $attr)) { $working = 1; $mode = 0; if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); } $attr = preg_replace('/^\\s+/', '', $attr); } break; case 2: // attribute value, a URL after href= for instance if (preg_match('%^"([^"]*)"(\\s+|/?$)%', $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = hq_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n'); } $working = 1; $mode = 0; $attr = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attr); break; } if (preg_match("%^'([^']*)'(\\s+|/?\$)%", $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = hq_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}='{$thisval}'", 'vless' => 'n'); } $working = 1; $mode = 0; $attr = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attr); break; } if (preg_match("%^([^\\s\"']+)(\\s+|/?\$)%", $attr, $match)) { $thisval = $match[1]; if (in_array(strtolower($attrname), $uris)) { $thisval = hq_kses_bad_protocol($thisval, $allowed_protocols); } if (false === array_key_exists($attrname, $attrarr)) { $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n'); } // We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; $attr = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attr); } break; } // switch if ($working == 0) { $attr = hq_kses_html_error($attr); $mode = 0; } } // while if ($mode == 1 && false === array_key_exists($attrname, $attrarr)) { // special case, for when the attribute list ends with a valueless // attribute like "selected" $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); } return $attrarr; }