Ejemplo n.º 1
0
/**
 * Builds an attribute list from string containing attributes.
 *
 * This function does a lot of work. It parses an attribute list into an array
 * with attribute data, and tries to do the right thing even if it gets weird
 * input. It will add quotes around attribute values that don't have any quotes
 * or apostrophes around them, to make it easier to produce HTML code that will
 * conform to W3C's HTML specification. It will also remove bad URL protocols
 * from attribute values. It also reduces duplicate attributes by using the
 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
 *
 * @since 1.0.0
 *
 * @param string $attr Attribute list from HTML element to closing HTML element tag
 * @param array $allowed_protocols Allowed protocols to keep
 * @return array List of attributes after parsing
 */
function asc_kses_hair($attr, $allowed_protocols)
{
    $attrarr = array();
    $mode = 0;
    $attrname = '';
    $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
    # Loop through the whole attribute list
    while (strlen($attr) != 0) {
        $working = 0;
        # Was the last operation successful?
        switch ($mode) {
            case 0:
                # attribute name, href for instance
                if (preg_match('/^([-a-zA-Z:]+)/', $attr, $match)) {
                    $attrname = $match[1];
                    $working = $mode = 1;
                    $attr = preg_replace('/^[-a-zA-Z:]+/', '', $attr);
                }
                break;
            case 1:
                # equals sign or valueless ("selected")
                if (preg_match('/^\\s*=\\s*/', $attr)) {
                    $working = 1;
                    $mode = 2;
                    $attr = preg_replace('/^\\s*=\\s*/', '', $attr);
                    break;
                }
                if (preg_match('/^\\s+/', $attr)) {
                    $working = 1;
                    $mode = 0;
                    if (false === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
                    }
                    $attr = preg_replace('/^\\s+/', '', $attr);
                }
                break;
            case 2:
                # attribute value, a URL after href= for instance
                if (preg_match('%^"([^"]*)"(\\s+|/?$)%', $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (false === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n');
                    }
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attr);
                    break;
                }
                if (preg_match("%^'([^']*)'(\\s+|/?\$)%", $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (false === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}='{$thisval}'", 'vless' => 'n');
                    }
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attr);
                    break;
                }
                if (preg_match("%^([^\\s\"']+)(\\s+|/?\$)%", $attr, $match)) {
                    $thisval = $match[1];
                    if (in_array(strtolower($attrname), $uris)) {
                        $thisval = asc_kses_bad_protocol($thisval, $allowed_protocols);
                    }
                    if (false === array_key_exists($attrname, $attrarr)) {
                        $attrarr[$attrname] = array('name' => $attrname, 'value' => $thisval, 'whole' => "{$attrname}=\"{$thisval}\"", 'vless' => 'n');
                    }
                    # We add quotes to conform to W3C's HTML spec.
                    $working = 1;
                    $mode = 0;
                    $attr = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attr);
                }
                break;
        }
        # switch
        if ($working == 0) {
            $attr = asc_kses_html_error($attr);
            $mode = 0;
        }
    }
    # while
    if ($mode == 1 && false === array_key_exists($attrname, $attrarr)) {
        # special case, for when the attribute list ends with a valueless
        # attribute like "selected"
        $attrarr[$attrname] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
    }
    return $attrarr;
}
Ejemplo n.º 2
0
/**
 * Checks and cleans a URL.
 *
 * A number of characters are removed from the URL. If the URL is for displaying
 * (the default behaviour) ampersands are also replaced. The 'clean_url' filter
 * is applied to the returned cleaned URL.
 *
 * @since 2.8.0
 * @uses asc_kses_bad_protocol() To only permit protocols in the URL set
 *		via $protocols or the common ones set in the function.
 *
 * @param string $url The URL to be cleaned.
 * @param array $protocols Optional. An array of acceptable protocols.
 *		Defaults to 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn' if not set.
 * @param string $_context Private. Use esc_url_raw() for database usage.
 * @return string The cleaned $url after the 'clean_url' filter is applied.
 */
function esc_url($url, $protocols = null, $_context = 'display')
{
    $original_url = $url;
    if ('' == $url) {
        return $url;
    }
    $url = preg_replace('|[^a-z0-9-~+_.?#=!&;,/:%@$\\|*\'()\\x80-\\xff]|i', '', $url);
    $strip = array('%0d', '%0a', '%0D', '%0A');
    $url = _deep_replace($strip, $url);
    $url = str_replace(';//', '://', $url);
    /* If the URL doesn't appear to contain a scheme, we
     * presume it needs http:// appended (unless a relative
     * link starting with /, # or ? or a php file).
     */
    if (strpos($url, ':') === false && !in_array($url[0], array('/', '#', '?')) && !preg_match('/^[a-z0-9-]+?\\.php/i', $url)) {
        $url = 'http://' . $url;
    }
    // Replace ampersands and single quotes only when displaying.
    if ('display' == $_context) {
        $url = asc_kses_normalize_entities($url);
        $url = str_replace('&', '&', $url);
        $url = str_replace("'", ''', $url);
    }
    if ('/' === $url[0]) {
        $good_protocol_url = $url;
    } else {
        if (!is_array($protocols)) {
            $protocols = asc_allowed_protocols();
        }
        $good_protocol_url = asc_kses_bad_protocol($url, $protocols);
        if (strtolower($good_protocol_url) != strtolower($url)) {
            return '';
        }
    }
    /**
     * Filter a string cleaned and escaped for output as a URL.
     *
     * @since 2.3.0
     *
     * @param string $good_protocol_url The cleaned URL to be returned.
     * @param string $original_url      The URL prior to cleaning.
     * @param string $_context          If 'display', replace ampersands and single quotes only.
     */
    return apply_filters('clean_url', $good_protocol_url, $original_url, $_context);
}