function htmLawed($t, $C = 1, $spec = array()) { $C = is_array($C) ? $C : array(); if (!empty($C['valid_xhtml'])) { $C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements']; $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2; $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2; } // config eles $e = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'b' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'dd' => 1, 'del' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'map' => 1, 'menu' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1); // 86/deprecated+embed+ruby if (!empty($C['safe'])) { unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']); } $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; if ($x == '-*') { $e = array(); } elseif (strpos($x, '*') === false) { $e = array_flip(explode(',', $x)); } else { if (isset($x[1])) { preg_match_all('`(?:^|-|\\+)[^\\-+]+?(?=-|\\+|$)`', $x, $m, PREG_SET_ORDER); for ($i = count($m); --$i >= 0;) { $m[$i] = $m[$i][0]; } foreach ($m as $v) { if ($v[0] == '+') { $e[substr($v, 1)] = 1; } if ($v[0] == '-' && isset($e[$v = substr($v, 1)]) && !in_array('+' . $v, $m)) { unset($e[$v]); } } } } $C['elements'] =& $e; // config denied attrs $C['deny_attribute'] = !empty($C['deny_attribute']) ? array_flip(explode(',', str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute'] . (!empty($C['safe']) ? ',on*' : '')))) : (!empty($C['safe']) ? array('on*' => 1) : array()); if (isset($C['deny_attribute']['on*'])) { unset($C['deny_attribute']['on*']); $C['deny_attribute'] += array('onblur' => 1, 'onchange' => 1, 'onclick' => 1, 'ondblclick' => 1, 'onfocus' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onreset' => 1, 'onselect' => 1, 'onsubmit' => 1); } // config URL $x = isset($C['schemes'][2]) && strpos($C['schemes'], ':') ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https'; $C['schemes'] = array(); foreach (explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v) { $x = $x2 = null; list($x, $x2) = explode(':', $v, 2); if ($x2) { $C['schemes'][$x] = array_flip(explode(',', $x2)); } } if (!isset($C['schemes']['*'])) { $C['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1); } if (!empty($C['safe']) && empty($C['schemes']['style'])) { $C['schemes']['style'] = array('nil' => 1); } $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; if (!isset($C['base_url']) or !preg_match('`^[a-zA-Z\\d.+\\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) { $C['base_url'] = $C['abs_url'] = 0; } // config rest $C['and_mark'] = empty($C['and_mark']) ? 0 : 1; $C['anti_link_spam'] = isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1])) ? $C['anti_link_spam'] : 0; $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; $C['balance'] = isset($C['balance']) ? (bool) $C['balance'] : 1; $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; $C['hook'] = !empty($C['hook']) && function_exists($C['hook']) ? $C['hook'] : 0; $C['hook_tag'] = !empty($C['hook_tag']) && function_exists($C['hook_tag']) ? $C['hook_tag'] : 0; $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool) $C['lc_std_val'] : 1; $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; $C['named_entity'] = isset($C['named_entity']) ? (bool) $C['named_entity'] : 1; $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; $C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1; $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; if (isset($GLOBALS['C'])) { $reC = $GLOBALS['C']; } $GLOBALS['C'] = $C; $spec = is_array($spec) ? $spec : hl_spec($spec); if (isset($GLOBALS['spec'])) { $reSpec = $GLOBALS['spec']; } $GLOBALS['spec'] = $spec; $t = preg_replace('`[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]`', '', $t); if ($C['clean_ms_char']) { $x = array("" => '', "�" => '€', "�" => '', "�" => 'ƒ', "�" => '…', "�" => '†', "�" => '‡', "�" => 'ˆ', "�" => '‰', "�" => 'Š', "�" => '‹', "�" => 'Œ', "�" => '', "�" => 'Ž', "�" => '', "�" => '', "�" => '•', "�" => '–', "�" => '—', "�" => '˜', "�" => '™', "�" => 'š', "�" => '›', "�" => 'œ', "�" => '', "�" => 'ž', "�" => 'Ÿ'); $x = $x + ($C['clean_ms_char'] == 1 ? array("�" => '‚', "�" => '„', "�" => '‘', "�" => '’', "�" => '“', "�" => '”') : array("�" => '\'', "�" => '"', "�" => '\'', "�" => '\'', "�" => '"', "�" => '"')); $t = strtr($t, $x); } if ($C['cdata'] or $C['comment']) { $t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\\[CDATA\\[.*?\\]\\]))>`sm', 'hl_cmtcd', $t); } $t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t)); if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) { $GLOBALS['hl_Ids'] = array(); } if ($C['hook']) { $t = $C['hook']($t, $C, $spec); } if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) { $GLOBALS[$C['show_setting']] = array('config' => $C, 'spec' => $spec, 'time' => microtime()); } // main $t = preg_replace_callback('`<(?:(?:\\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t); $t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t; $t = ($C['cdata'] or $C['comment']) && strpos($t, "") !== false ? str_replace(array("", "", "", "", ""), array('', '', '&', '<', '>'), $t) : $t; $t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t; unset($C, $e); if (isset($reC)) { $GLOBALS['C'] = $reC; } if (isset($reSpec)) { $GLOBALS['spec'] = $reSpec; } return $t; // eof }
function htmLawed($in, $cf = 1, $spec = array()) { $cf = is_array($cf) ? $cf : array(); // config: 'valid_xhtml' if (!empty($cf['valid_xhtml'])) { $cf['elements'] = !empty($cf['elements']) ? $cf['elements'] : '*-center-dir-font-isindex-menu-s-strike-u'; $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 2; $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 2; } // config: elements $ec = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'b' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'dd' => 1, 'del' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'map' => 1, 'menu' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1); // 86 elements - all incl. deprecated + embed + ruby set $tmp = !empty($cf['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $cf['elements']) : '*'; if ($tmp == '-*') { $ec = array(); } elseif (strpos($tmp, '*') === false) { $ec = array_flip(explode(',', $tmp)); } elseif (isset($tmp[1])) { preg_match_all('`(?:^|-|\\+)[^\\-+]+?(?=-|\\+|$)`', $tmp, $m, PREG_SET_ORDER); foreach ($m as $v) { $v = $v[0]; if ($v[0] == '+') { $ec[substr($v, 1)] = 1; } if ($v[0] == '-' && isset($ec[$v = substr($v, 1)])) { unset($ec[$v]); } } } $cf['elements'] =& $ec; // config: denied attribute set $cf['deny_attribute'] = !empty($cf['deny_attribute']) ? array_flip(explode(',', str_replace(array("\n", "\r", "\t", ' '), '', $cf['deny_attribute']))) : array(); if (isset($cf['deny_attribute']['on*'])) { unset($cf['deny_attribute']['on*']); $cf['deny_attribute'] += array('onblur' => 1, 'onchange' => 1, 'onclick' => 1, 'ondblclick' => 1, 'onfocus' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onreset' => 1, 'onselect' => 1, 'onsubmit' => 1); } // config: scheme $tmp = isset($cf['schemes'][2]) && strpos($cf['schemes'], ':') ? strtolower($cf['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https'; $cf['schemes'] = array(); foreach (explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $tmp)) as $v) { $tmp = $tmp2 = null; list($tmp, $tmp2) = explode(':', $v, 2); if ($tmp2) { $cf['schemes'][$tmp] = array_flip(explode(',', $tmp2)); } } if (!isset($cf['schemes']['*'])) { $cf['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1); } // config: abs/rel URL $cf['abs_url'] = isset($cf['abs_url']) ? $cf['abs_url'] : 0; if (!isset($cf['base_url']) or !preg_match('`^[a-zA-Z\\d.+\\-]+://[^/]+/(.+?/)?$`', $cf['base_url'])) { $cf['base_url'] = $cf['abs_url'] = 0; } // config: other $cf['anti_link_spam'] = isset($cf['anti_link_spam']) && is_array($cf['anti_link_spam']) && count($cf['anti_link_spam']) == 2 && (empty($cf['anti_link_spam'][0]) or hl_regex($cf['anti_link_spam'][0])) && (empty($cf['anti_link_spam'][1]) or hl_regex($cf['anti_link_spam'][1])) ? $cf['anti_link_spam'] : 0; $cf['anti_mail_spam'] = isset($cf['anti_mail_spam']) ? $cf['anti_mail_spam'] : 0; $cf['balance'] = isset($cf['balance']) ? (bool) $cf['balance'] : 1; $cf['cdata'] = isset($cf['cdata']) ? $cf['cdata'] : 3; $cf['clean_ms_char'] = isset($cf['clean_ms_char']) ? $cf['clean_ms_char'] : 0; $cf['comment'] = isset($cf['comment']) ? $cf['comment'] : 3; $cf['css_expression'] = isset($cf['css_expression']) ? (bool) $cf['css_expression'] : 0; $cf['hexdec_entity'] = isset($cf['hexdec_entity']) ? $cf['hexdec_entity'] : 1; $cf['hook'] = !empty($cf['hook']) && function_exists($cf['hook']) ? $cf['hook'] : 0; $cf['keep_bad'] = isset($cf['keep_bad']) ? $cf['keep_bad'] : 6; $cf['lc_std_val'] = isset($cf['lc_std_val']) ? (bool) $cf['lc_std_val'] : 1; $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 1; $cf['named_entity'] = isset($cf['named_entity']) ? (bool) $cf['named_entity'] : 1; $cf['no_deprecated_attr'] = isset($cf['no_deprecated_attr']) ? $cf['no_deprecated_attr'] : 1; $cf['parent'] = isset($cf['parent'][0]) ? strtolower($cf['parent']) : 'body'; $cf['unique_ids'] = isset($cf['unique_ids']) ? $cf['unique_ids'] : 1; $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 0; if (isset($GLOBALS['cf'])) { $resetCf = $GLOBALS['cf']; } $GLOBALS['cf'] = $cf; // $spec $spec = is_array($spec) ? $spec : hl_spec($spec); if (isset($GLOBALS['spec'])) { $resetSpec = $GLOBALS['spec']; } $GLOBALS['spec'] = $spec; // chars $in = preg_replace('`[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]`', '', $in); if ($cf['clean_ms_char']) { $el = array("" => '', "�" => '€', "�" => '', "�" => 'ƒ', "�" => '…', "�" => '†', "�" => '‡', "�" => 'ˆ', "�" => '‰', "�" => 'Š', "�" => '‹', "�" => 'Œ', "�" => '', "�" => 'Ž', "�" => '', "�" => '', "�" => '•', "�" => '–', "�" => '—', "�" => '˜', "�" => '™', "�" => 'š', "�" => '›', "�" => 'œ', "�" => '', "�" => 'ž', "�" => 'Ÿ'); $el = $el + ($cf['clean_ms_char'] == 1 ? array("�" => '‚', "�" => '„', "�" => '‘', "�" => '’', "�" => '“', "�" => '”') : array("�" => '\'', "�" => '"', "�" => '\'', "�" => '\'', "�" => '"', "�" => '"')); $in = strtr($in, $el); } // comments/CDATA secs if ($cf['cdata'] or $cf['comment']) { $in = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\\[CDATA\\[.*?\\]\\]))>`sm', 'hl_cmtcd', $in); } // entities $in = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $in)); // for unique-ID check; global for multiple calls if ($cf['unique_ids'] && !isset($GLOBALS['hl_Ids'])) { $GLOBALS['hl_Ids'] = array(); } // custom hook if ($cf['hook']) { $in = $cf['hook']($in, $cf, $spec); } // main work $in = preg_replace_callback('`<(?:(?:\\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $in); $in = $cf['balance'] ? hl_bal($in, $cf['keep_bad'], $cf['parent']) : $in; $in = ($cf['cdata'] or $cf['comment']) && strpos($in, "") !== false ? str_replace(array("", "", "", "", ""), array('', '', '&', '<', '>'), $in) : $in; // clean-up & end unset($cf, $ec, $ac); if (isset($resetCf)) { $GLOBALS['cf'] = $resetCf; } if (isset($resetSpec)) { $GLOBALS['spec'] = $resetSpec; } return $in; // eof }