Esempio n. 1
0
function htmLawed($t, $C = 1, $spec = array())
{
    $C = is_array($C) ? $C : array();
    if (!empty($C['valid_xhtml'])) {
        $C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements'];
        $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
        $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
    }
    // config eles
    $e = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'b' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'dd' => 1, 'del' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'map' => 1, 'menu' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1);
    // 86/deprecated+embed+ruby
    if (!empty($C['safe'])) {
        unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']);
    }
    $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
    if ($x == '-*') {
        $e = array();
    } elseif (strpos($x, '*') === false) {
        $e = array_flip(explode(',', $x));
    } else {
        if (isset($x[1])) {
            preg_match_all('`(?:^|-|\\+)[^\\-+]+?(?=-|\\+|$)`', $x, $m, PREG_SET_ORDER);
            for ($i = count($m); --$i >= 0;) {
                $m[$i] = $m[$i][0];
            }
            foreach ($m as $v) {
                if ($v[0] == '+') {
                    $e[substr($v, 1)] = 1;
                }
                if ($v[0] == '-' && isset($e[$v = substr($v, 1)]) && !in_array('+' . $v, $m)) {
                    unset($e[$v]);
                }
            }
        }
    }
    $C['elements'] =& $e;
    // config denied attrs
    $C['deny_attribute'] = !empty($C['deny_attribute']) ? array_flip(explode(',', str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute'] . (!empty($C['safe']) ? ',on*' : '')))) : (!empty($C['safe']) ? array('on*' => 1) : array());
    if (isset($C['deny_attribute']['on*'])) {
        unset($C['deny_attribute']['on*']);
        $C['deny_attribute'] += array('onblur' => 1, 'onchange' => 1, 'onclick' => 1, 'ondblclick' => 1, 'onfocus' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onreset' => 1, 'onselect' => 1, 'onsubmit' => 1);
    }
    // config URL
    $x = isset($C['schemes'][2]) && strpos($C['schemes'], ':') ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
    $C['schemes'] = array();
    foreach (explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v) {
        $x = $x2 = null;
        list($x, $x2) = explode(':', $v, 2);
        if ($x2) {
            $C['schemes'][$x] = array_flip(explode(',', $x2));
        }
    }
    if (!isset($C['schemes']['*'])) {
        $C['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1);
    }
    if (!empty($C['safe']) && empty($C['schemes']['style'])) {
        $C['schemes']['style'] = array('nil' => 1);
    }
    $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
    if (!isset($C['base_url']) or !preg_match('`^[a-zA-Z\\d.+\\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) {
        $C['base_url'] = $C['abs_url'] = 0;
    }
    // config rest
    $C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
    $C['anti_link_spam'] = isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1])) ? $C['anti_link_spam'] : 0;
    $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
    $C['balance'] = isset($C['balance']) ? (bool) $C['balance'] : 1;
    $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
    $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
    $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
    $C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
    $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
    $C['hook'] = !empty($C['hook']) && function_exists($C['hook']) ? $C['hook'] : 0;
    $C['hook_tag'] = !empty($C['hook_tag']) && function_exists($C['hook_tag']) ? $C['hook_tag'] : 0;
    $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
    $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool) $C['lc_std_val'] : 1;
    $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
    $C['named_entity'] = isset($C['named_entity']) ? (bool) $C['named_entity'] : 1;
    $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
    $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
    $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
    $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
    $C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1;
    $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
    if (isset($GLOBALS['C'])) {
        $reC = $GLOBALS['C'];
    }
    $GLOBALS['C'] = $C;
    $spec = is_array($spec) ? $spec : hl_spec($spec);
    if (isset($GLOBALS['spec'])) {
        $reSpec = $GLOBALS['spec'];
    }
    $GLOBALS['spec'] = $spec;
    $t = preg_replace('`[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]`', '', $t);
    if ($C['clean_ms_char']) {
        $x = array("" => '', "�" => '€', "�" => '', "�" => 'ƒ', "�" => '…', "�" => '†', "�" => '‡', "�" => 'ˆ', "�" => '‰', "�" => 'Š', "�" => '‹', "�" => 'Œ', "�" => '', "�" => 'Ž', "�" => '', "�" => '', "�" => '•', "�" => '–', "�" => '—', "�" => '˜', "�" => '™', "�" => 'š', "�" => '›', "�" => 'œ', "�" => '', "�" => 'ž', "�" => 'Ÿ');
        $x = $x + ($C['clean_ms_char'] == 1 ? array("�" => '‚', "�" => '„', "�" => '‘', "�" => '’', "�" => '“', "�" => '”') : array("�" => '\'', "�" => '"', "�" => '\'', "�" => '\'', "�" => '"', "�" => '"'));
        $t = strtr($t, $x);
    }
    if ($C['cdata'] or $C['comment']) {
        $t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\\[CDATA\\[.*?\\]\\]))>`sm', 'hl_cmtcd', $t);
    }
    $t = preg_replace_callback('`&amp;([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&amp;', $t));
    if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) {
        $GLOBALS['hl_Ids'] = array();
    }
    if ($C['hook']) {
        $t = $C['hook']($t, $C, $spec);
    }
    if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) {
        $GLOBALS[$C['show_setting']] = array('config' => $C, 'spec' => $spec, 'time' => microtime());
    }
    // main
    $t = preg_replace_callback('`<(?:(?:\\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
    $t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
    $t = ($C['cdata'] or $C['comment']) && strpos($t, "") !== false ? str_replace(array("", "", "", "", ""), array('', '', '&', '<', '>'), $t) : $t;
    $t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
    unset($C, $e);
    if (isset($reC)) {
        $GLOBALS['C'] = $reC;
    }
    if (isset($reSpec)) {
        $GLOBALS['spec'] = $reSpec;
    }
    return $t;
    // eof
}
function htmLawed($in, $cf = 1, $spec = array())
{
    $cf = is_array($cf) ? $cf : array();
    // config: 'valid_xhtml'
    if (!empty($cf['valid_xhtml'])) {
        $cf['elements'] = !empty($cf['elements']) ? $cf['elements'] : '*-center-dir-font-isindex-menu-s-strike-u';
        $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 2;
        $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 2;
    }
    // config: elements
    $ec = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'b' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'dd' => 1, 'del' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'map' => 1, 'menu' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1);
    // 86 elements - all incl. deprecated + embed + ruby set
    $tmp = !empty($cf['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $cf['elements']) : '*';
    if ($tmp == '-*') {
        $ec = array();
    } elseif (strpos($tmp, '*') === false) {
        $ec = array_flip(explode(',', $tmp));
    } elseif (isset($tmp[1])) {
        preg_match_all('`(?:^|-|\\+)[^\\-+]+?(?=-|\\+|$)`', $tmp, $m, PREG_SET_ORDER);
        foreach ($m as $v) {
            $v = $v[0];
            if ($v[0] == '+') {
                $ec[substr($v, 1)] = 1;
            }
            if ($v[0] == '-' && isset($ec[$v = substr($v, 1)])) {
                unset($ec[$v]);
            }
        }
    }
    $cf['elements'] =& $ec;
    // config: denied attribute set
    $cf['deny_attribute'] = !empty($cf['deny_attribute']) ? array_flip(explode(',', str_replace(array("\n", "\r", "\t", ' '), '', $cf['deny_attribute']))) : array();
    if (isset($cf['deny_attribute']['on*'])) {
        unset($cf['deny_attribute']['on*']);
        $cf['deny_attribute'] += array('onblur' => 1, 'onchange' => 1, 'onclick' => 1, 'ondblclick' => 1, 'onfocus' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onreset' => 1, 'onselect' => 1, 'onsubmit' => 1);
    }
    // config: scheme
    $tmp = isset($cf['schemes'][2]) && strpos($cf['schemes'], ':') ? strtolower($cf['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
    $cf['schemes'] = array();
    foreach (explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $tmp)) as $v) {
        $tmp = $tmp2 = null;
        list($tmp, $tmp2) = explode(':', $v, 2);
        if ($tmp2) {
            $cf['schemes'][$tmp] = array_flip(explode(',', $tmp2));
        }
    }
    if (!isset($cf['schemes']['*'])) {
        $cf['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1);
    }
    // config: abs/rel URL
    $cf['abs_url'] = isset($cf['abs_url']) ? $cf['abs_url'] : 0;
    if (!isset($cf['base_url']) or !preg_match('`^[a-zA-Z\\d.+\\-]+://[^/]+/(.+?/)?$`', $cf['base_url'])) {
        $cf['base_url'] = $cf['abs_url'] = 0;
    }
    // config: other
    $cf['anti_link_spam'] = isset($cf['anti_link_spam']) && is_array($cf['anti_link_spam']) && count($cf['anti_link_spam']) == 2 && (empty($cf['anti_link_spam'][0]) or hl_regex($cf['anti_link_spam'][0])) && (empty($cf['anti_link_spam'][1]) or hl_regex($cf['anti_link_spam'][1])) ? $cf['anti_link_spam'] : 0;
    $cf['anti_mail_spam'] = isset($cf['anti_mail_spam']) ? $cf['anti_mail_spam'] : 0;
    $cf['balance'] = isset($cf['balance']) ? (bool) $cf['balance'] : 1;
    $cf['cdata'] = isset($cf['cdata']) ? $cf['cdata'] : 3;
    $cf['clean_ms_char'] = isset($cf['clean_ms_char']) ? $cf['clean_ms_char'] : 0;
    $cf['comment'] = isset($cf['comment']) ? $cf['comment'] : 3;
    $cf['css_expression'] = isset($cf['css_expression']) ? (bool) $cf['css_expression'] : 0;
    $cf['hexdec_entity'] = isset($cf['hexdec_entity']) ? $cf['hexdec_entity'] : 1;
    $cf['hook'] = !empty($cf['hook']) && function_exists($cf['hook']) ? $cf['hook'] : 0;
    $cf['keep_bad'] = isset($cf['keep_bad']) ? $cf['keep_bad'] : 6;
    $cf['lc_std_val'] = isset($cf['lc_std_val']) ? (bool) $cf['lc_std_val'] : 1;
    $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 1;
    $cf['named_entity'] = isset($cf['named_entity']) ? (bool) $cf['named_entity'] : 1;
    $cf['no_deprecated_attr'] = isset($cf['no_deprecated_attr']) ? $cf['no_deprecated_attr'] : 1;
    $cf['parent'] = isset($cf['parent'][0]) ? strtolower($cf['parent']) : 'body';
    $cf['unique_ids'] = isset($cf['unique_ids']) ? $cf['unique_ids'] : 1;
    $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 0;
    if (isset($GLOBALS['cf'])) {
        $resetCf = $GLOBALS['cf'];
    }
    $GLOBALS['cf'] = $cf;
    // $spec
    $spec = is_array($spec) ? $spec : hl_spec($spec);
    if (isset($GLOBALS['spec'])) {
        $resetSpec = $GLOBALS['spec'];
    }
    $GLOBALS['spec'] = $spec;
    // chars
    $in = preg_replace('`[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]`', '', $in);
    if ($cf['clean_ms_char']) {
        $el = array("" => '', "�" => '&#8364;', "�" => '', "�" => '&#402;', "�" => '&#8230;', "�" => '&#8224;', "�" => '&#8225;', "�" => '&#710;', "�" => '&#8240;', "�" => '&#352;', "�" => '&#8249;', "�" => '&#338;', "�" => '', "�" => '&#381;', "�" => '', "�" => '', "�" => '&#8226;', "�" => '&#8211;', "�" => '&#8212;', "�" => '&#732;', "�" => '&#8482;', "�" => '&#353;', "�" => '&#8250;', "�" => '&#339;', "�" => '', "�" => '&#382;', "�" => '&#376;');
        $el = $el + ($cf['clean_ms_char'] == 1 ? array("�" => '&#8218;', "�" => '&#8222;', "�" => '&#8216;', "�" => '&#8217;', "�" => '&#8220;', "�" => '&#8221;') : array("�" => '\'', "�" => '"', "�" => '\'', "�" => '\'', "�" => '"', "�" => '"'));
        $in = strtr($in, $el);
    }
    // comments/CDATA secs
    if ($cf['cdata'] or $cf['comment']) {
        $in = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\\[CDATA\\[.*?\\]\\]))>`sm', 'hl_cmtcd', $in);
    }
    // entities
    $in = preg_replace_callback('`&amp;([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&amp;', $in));
    // for unique-ID check; global for multiple calls
    if ($cf['unique_ids'] && !isset($GLOBALS['hl_Ids'])) {
        $GLOBALS['hl_Ids'] = array();
    }
    // custom hook
    if ($cf['hook']) {
        $in = $cf['hook']($in, $cf, $spec);
    }
    // main work
    $in = preg_replace_callback('`<(?:(?:\\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $in);
    $in = $cf['balance'] ? hl_bal($in, $cf['keep_bad'], $cf['parent']) : $in;
    $in = ($cf['cdata'] or $cf['comment']) && strpos($in, "") !== false ? str_replace(array("", "", "", "", ""), array('', '', '&', '<', '>'), $in) : $in;
    // clean-up & end
    unset($cf, $ec, $ac);
    if (isset($resetCf)) {
        $GLOBALS['cf'] = $resetCf;
    }
    if (isset($resetSpec)) {
        $GLOBALS['spec'] = $resetSpec;
    }
    return $in;
    // eof
}