/**
  * Checks and cleans a URL.
  *
  * A number of characters are removed from the URL. If the URL is for displaying
  * (the default behaviour) amperstands are also replaced. The 'esc_url' filter
  * is applied to the returned cleaned URL.
  *
  * @since 1.2.0
  * @uses nxt_kses_bad_protocol() To only permit protocols in the URL set
  *		via $protocols or the common ones set in the function.
  *
  * @param string $url The URL to be cleaned.
  * @param array $protocols Optional. An array of acceptable protocols.
  *		Defaults to 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn' if not set.
  * @param string $context Optional. How the URL will be used. Default is 'display'.
  * @return string The cleaned $url after the 'cleaned_url' filter is applied.
  */
 function clean_url($url, $protocols = null, $context = 'display')
 {
     $original_url = $url;
     if ('' == $url) {
         return $url;
     }
     $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\\|*\'()\\x80-\\xff]|i', '', $url);
     $strip = array('%0d', '%0a', '%0D', '%0A');
     $url = _deep_replace($strip, $url);
     $url = str_replace(';//', '://', $url);
     /* If the URL doesn't appear to contain a scheme, we
      * presume it needs http:// appended (unless a relative
      * link starting with / or a php file).
      */
     if (strpos($url, ':') === false && substr($url, 0, 1) != '/' && substr($url, 0, 1) != '#' && !preg_match('/^[a-z0-9-]+?\\.php/i', $url)) {
         $url = 'http://' . $url;
     }
     // Replace ampersands and single quotes only when displaying.
     if ('display' == $context) {
         $url = nxt_kses_normalize_entities($url);
         $url = str_replace('&', '&', $url);
         $url = str_replace("'", ''', $url);
     }
     if (!is_array($protocols)) {
         $protocols = array('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn');
     }
     if (nxt_kses_bad_protocol($url, $protocols) != $url) {
         return '';
     }
     return apply_filters('clean_url', $url, $original_url, $context);
 }
/**
 * Builds an attribute list from string containing attributes.
 *
 * This function does a lot of work. It parses an attribute list into an array
 * with attribute data, and tries to do the right thing even if it gets weird
 * input. It will add quotes around attribute values that don't have any quotes
 * or apostrophes around them, to make it easier to produce HTML code that will
 * conform to W3C's HTML specification. It will also remove bad URL protocols
 * from attribute values.  It also reduces duplicate attributes by using the
 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
 *
 * @since 1.0.0
 *
 * @param string $attr Attribute list from HTML element to closing HTML element tag
 * @param array $allowed_protocols Allowed protocols to keep
 * @return array List of attributes after parsing
 */
function nxt_kses_hair($attr, $allowed_protocols)
{
    $attrarr = array();
    $mode = 0;
    $attrname = '';
    $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
    # Loop through the whole attribute list
    while (strlen($attr) != 0) {
        $working = 0;
        # Was the last operation successful?
        switch ($mode) {
            case 0:
                # attribute name, href for instance
                if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
                    $attrname = $match[1];
                    $working = $mode = 1;
                    $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
                }
                break;
            case 1:
                # equals sign or valueless ("selected")
                if (preg_match('/^\\s*=\\s*/', $attr)) {
                    $working = 1;
                    $mode = 2;
                    $attr = preg_replace('/^\\s*=\\s*/', '', $attr);
                    break;
                }
                if (preg_match('/^\\s+/', $attr)) {
                    $working = 1;
                    $mode = 0;
                    if (FALSE === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
                    }
                    $attr = preg_replace('/^\\s+/', '', $attr);
                }
                break;
            case 2:
                # attribute value, a URL after href= for instance
                if (preg_match('%^"([^"]*)"(\\s+|/?$)%', $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = nxt_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (FALSE === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n');
                    }
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attr);
                    break;
                }
                if (preg_match("%^'([^']*)'(\\s+|/?\$)%", $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = nxt_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (FALSE === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}='{$thisval}'", 'vless' => 'n');
                    }
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attr);
                    break;
                }
                if (preg_match("%^([^\\s\"']+)(\\s+|/?\$)%", $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = nxt_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (FALSE === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n');
                    }
                    # We add quotes to conform to W3C's HTML spec.
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attr);
                }
                break;
        }
        # switch
        if ($working == 0) {
            $attr = nxt_kses_html_error($attr);
            $mode = 0;
        }
    }
    # while
    if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr)) {
        # special case, for when the attribute list ends with a valueless
        # attribute like "selected"
        $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
    }
    return $attrarr;
}
Example #3
0
/**
 * Checks and cleans a URL.
 *
 * A number of characters are removed from the URL. If the URL is for displaying
 * (the default behaviour) ampersands are also replaced. The 'clean_url' filter
 * is applied to the returned cleaned URL.
 *
 * @since 2.8.0
 * @uses nxt_kses_bad_protocol() To only permit protocols in the URL set
 *		via $protocols or the common ones set in the function.
 *
 * @param string $url The URL to be cleaned.
 * @param array $protocols Optional. An array of acceptable protocols.
 *		Defaults to 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn' if not set.
 * @param string $_context Private. Use esc_url_raw() for database usage.
 * @return string The cleaned $url after the 'clean_url' filter is applied.
 */
function esc_url($url, $protocols = null, $_context = 'display')
{
    $original_url = $url;
    if ('' == $url) {
        return $url;
    }
    $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\\|*\'()\\x80-\\xff]|i', '', $url);
    $strip = array('%0d', '%0a', '%0D', '%0A');
    $url = _deep_replace($strip, $url);
    $url = str_replace(';//', '://', $url);
    /* If the URL doesn't appear to contain a scheme, we
     * presume it needs http:// appended (unless a relative
     * link starting with /, # or ? or a php file).
     */
    if (strpos($url, ':') === false && !in_array($url[0], array('/', '#', '?')) && !preg_match('/^[a-z0-9-]+?\\.php/i', $url)) {
        $url = 'http://' . $url;
    }
    // Replace ampersands and single quotes only when displaying.
    if ('display' == $_context) {
        $url = nxt_kses_normalize_entities($url);
        $url = str_replace('&', '&', $url);
        $url = str_replace("'", ''', $url);
    }
    if (!is_array($protocols)) {
        $protocols = nxt_allowed_protocols();
    }
    if (nxt_kses_bad_protocol($url, $protocols) != $url) {
        return '';
    }
    return apply_filters('clean_url', $url, $original_url, $_context);
}