/** * Test {@link SafeHtmlChecker::check()}. * NOTE: assignment by "& new" is required for PHP4! See also http://de3.php.net/manual/en/function.xml-set-object.php#46107 * Alternatively, multiple vars for each test may work, or unsetting the last one.. */ function test_htmlchecker_check() { global $Messages; $context = 'posting'; $allow_css_tweaks = false; $allow_iframes = false; $allow_javascript = false; $allow_objects = false; $SHC = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects); $SHC->check('<moo>foo</moo>'); $this->assertEqual($GLOBALS['Messages']->messages['error'][0], T_('Illegal tag') . ': <code>moo</code>'); $Messages->clear(); $SHC = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects); $SHC->check('<img>foo</img>'); $this->assertEqual($GLOBALS['Messages']->messages['error'][0], sprintf(T_('Tag <%s> may not contain raw character data'), '<code>img</code>')); }
/** * Check raw HTML input for different levels of sanity including: * - XHTML validation * - Javascript injection * - antispam * * Also cleans up the content on some levels: * - trimming * - balancing tags * * WARNING: this does *NOT* (necessarilly) make the HTML code safe. * It only checks on it and produces error messages. * It is NOT (necessarily) safe to use the output. * * @param string The content to format * @param string * @param User User (used for "posting" and "xmlrpc_posting" context). Default: $current_User * @param string Encoding (used for XHTML_Validator only!); defaults to $io_charset * @return boolean|string */ function check_html_sanity($content, $context = 'posting', $User = NULL, $encoding = NULL) { global $use_balanceTags, $admin_url; global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks; global $Messages; if (empty($User)) { /** * @var User */ global $current_User; $User = $current_User; } // Add error messages $verbose = true; switch ($context) { case 'posting': case 'xmlrpc_posting': $Group = $User->get_Group(); if ($context == 'posting') { $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always'; } else { $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always'; } $allow_css_tweaks = $Group->perm_xhtml_css_tweaks; $allow_javascript = $Group->perm_xhtml_javascript; $allow_iframes = $Group->perm_xhtml_iframes; $allow_objects = $Group->perm_xhtml_objects; $bypass_antispam = $Group->perm_bypass_antispam; break; case 'commenting': $xhtmlvalidation = $use_xhtmlvalidation_for_comments; $allow_css_tweaks = $comments_allow_css_tweaks; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions. // If so, then we also need to bypass in several other places. $bypass_antispam = false; break; case 'general_array_params': $xhtmlvalidation = false; $allow_css_tweaks = true; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; $bypass_antispam = false; // Do not add error messages in this context $verbose = false; break; case 'head_extension': $xhtmlvalidation = true; // We disable everything else, because the XMHTML validator will set explicit rules for the 'head_extension' context $allow_css_tweaks = false; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; $bypass_antispam = false; // Do not add error messages in this context $verbose = false; break; default: debug_die('unknown context: ' . $context); } $error = false; // Replace any & that is not a character or entity reference with & $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&', $content); // ANTISPAM check: $error = !$bypass_antispam && ($block = antispam_check($content)); if ($error && $verbose) { // Add error message if ($context == 'xmlrpc_posting') { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s".'), $block); } else { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?).') : sprintf(T_('Illegal content found: blacklisted word «%s».'), htmlspecialchars($block)); } $Messages->add($errmsg, 'error'); } $content = trim($content); if ($use_balanceTags && $context != 'general_array_params') { // Auto close open tags: // Auto close only if the content is NOT from a general array param where open and closed html tags may appear separately $content = balance_tags($content); } if ($xhtmlvalidation) { // We want to validate XHTML: load_class('xhtml_validator/_xhtml_validator.class.php', 'XHTML_Validator'); $XHTML_Validator = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding); if (!$XHTML_Validator->check($content)) { $error = true; } } else { // We do not WANT to validate XHTML, fall back to basic security checking: // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above. // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML. if ($context == 'commenting') { // DEPRECATED but still... // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags); $content = strip_tags($content, $comment_allowed_tags); } // Security checking: $check = $content; // Open comments or '<![CDATA[' are dangerous $check = str_replace('<!', '<', $check); // # # are delimiters // i modifier at the end means caseless // CHECK Styling restictions: $css_tweaks_error = !$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches); if ($css_tweaks_error && $verbose) { $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK JAVASCRIPT: $javascript_error = !$allow_javascript && (preg_match('~( < \\s* //? \\s* (script|noscript) )~xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches)); if ($javascript_error && $verbose) { $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK IFRAMES: $iframe_error = !$allow_iframes && preg_match('~( < \\s* //? \\s* (frame|iframe) )~xi', $check, $matches); if ($iframe_error && $verbose) { $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK OBJECTS: $object_error = !$allow_objects && preg_match('~( < \\s* //? \\s* (applet|object|param|embed) )~xi', $check, $matches); if ($object_error && $verbose) { $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error'); } // Set the final error value based on all of the results $error = $error || $css_tweaks_error || $javascript_error || $iframe_error || $object_error; } if ($error) { if ($verbose && !empty($User) && !empty($Group) && $User->check_perm('users', 'edit', false)) { $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=groups&grp_ID=' . $Group->ID . '"'), 'error'); } return false; } // Return sanitized content return $content; }