/**
  * Test {@link SafeHtmlChecker::check()}.
  * NOTE: assignment by "& new" is required for PHP4! See also http://de3.php.net/manual/en/function.xml-set-object.php#46107
  *       Alternatively, multiple vars for each test may work, or unsetting the last one..
  */
 function test_htmlchecker_check()
 {
     global $Messages;
     $context = 'posting';
     $allow_css_tweaks = false;
     $allow_iframes = false;
     $allow_javascript = false;
     $allow_objects = false;
     $SHC = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects);
     $SHC->check('<moo>foo</moo>');
     $this->assertEqual($GLOBALS['Messages']->messages['error'][0], T_('Illegal tag') . ': <code>moo</code>');
     $Messages->clear();
     $SHC = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects);
     $SHC->check('<img>foo</img>');
     $this->assertEqual($GLOBALS['Messages']->messages['error'][0], sprintf(T_('Tag &lt;%s&gt; may not contain raw character data'), '<code>img</code>'));
 }
Example #2
0
/**
 * Check raw HTML input for different levels of sanity including:
 * - XHTML validation
 * - Javascript injection
 * - antispam
 *
 * Also cleans up the content on some levels:
 * - trimming
 * - balancing tags
 *
 * WARNING: this does *NOT* (necessarilly) make the HTML code safe.
 * It only checks on it and produces error messages.
 * It is NOT (necessarily) safe to use the output.
 *
 * @param string The content to format
 * @param string
 * @param User User (used for "posting" and "xmlrpc_posting" context). Default: $current_User
 * @param string Encoding (used for XHTML_Validator only!); defaults to $io_charset
 * @return boolean|string
 */
function check_html_sanity($content, $context = 'posting', $User = NULL, $encoding = NULL)
{
    global $use_balanceTags, $admin_url;
    global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks;
    global $Messages;
    if (empty($User)) {
        /**
         * @var User
         */
        global $current_User;
        $User = $current_User;
    }
    // Add error messages
    $verbose = true;
    switch ($context) {
        case 'posting':
        case 'xmlrpc_posting':
            $Group = $User->get_Group();
            if ($context == 'posting') {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always';
            } else {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always';
            }
            $allow_css_tweaks = $Group->perm_xhtml_css_tweaks;
            $allow_javascript = $Group->perm_xhtml_javascript;
            $allow_iframes = $Group->perm_xhtml_iframes;
            $allow_objects = $Group->perm_xhtml_objects;
            $bypass_antispam = $Group->perm_bypass_antispam;
            break;
        case 'commenting':
            $xhtmlvalidation = $use_xhtmlvalidation_for_comments;
            $allow_css_tweaks = $comments_allow_css_tweaks;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions.
            // If so, then we also need to bypass in several other places.
            $bypass_antispam = false;
            break;
        case 'general_array_params':
            $xhtmlvalidation = false;
            $allow_css_tweaks = true;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            $bypass_antispam = false;
            // Do not add error messages in this context
            $verbose = false;
            break;
        case 'head_extension':
            $xhtmlvalidation = true;
            // We disable everything else, because the XMHTML validator will set explicit rules for the 'head_extension' context
            $allow_css_tweaks = false;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            $bypass_antispam = false;
            // Do not add error messages in this context
            $verbose = false;
            break;
        default:
            debug_die('unknown context: ' . $context);
    }
    $error = false;
    // Replace any & that is not a character or entity reference with &amp;
    $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&amp;', $content);
    // ANTISPAM check:
    $error = !$bypass_antispam && ($block = antispam_check($content));
    if ($error && $verbose) {
        // Add error message
        if ($context == 'xmlrpc_posting') {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s".'), $block);
        } else {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?).') : sprintf(T_('Illegal content found: blacklisted word &laquo;%s&raquo;.'), htmlspecialchars($block));
        }
        $Messages->add($errmsg, 'error');
    }
    $content = trim($content);
    if ($use_balanceTags && $context != 'general_array_params') {
        // Auto close open tags:
        // Auto close only if the content is NOT from a general array param where open and closed html tags may appear separately
        $content = balance_tags($content);
    }
    if ($xhtmlvalidation) {
        // We want to validate XHTML:
        load_class('xhtml_validator/_xhtml_validator.class.php', 'XHTML_Validator');
        $XHTML_Validator = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding);
        if (!$XHTML_Validator->check($content)) {
            $error = true;
        }
    } else {
        // We do not WANT to validate XHTML, fall back to basic security checking:
        // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above.
        // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML.
        if ($context == 'commenting') {
            // DEPRECATED but still...
            // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags);
            $content = strip_tags($content, $comment_allowed_tags);
        }
        // Security checking:
        $check = $content;
        // Open comments or '<![CDATA[' are dangerous
        $check = str_replace('<!', '<', $check);
        // # # are delimiters
        // i modifier at the end means caseless
        // CHECK Styling restictions:
        $css_tweaks_error = !$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches);
        if ($css_tweaks_error && $verbose) {
            $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK JAVASCRIPT:
        $javascript_error = !$allow_javascript && (preg_match('~( < \\s* //? \\s* (script|noscript) )~xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches));
        if ($javascript_error && $verbose) {
            $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK IFRAMES:
        $iframe_error = !$allow_iframes && preg_match('~( < \\s* //? \\s* (frame|iframe) )~xi', $check, $matches);
        if ($iframe_error && $verbose) {
            $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK OBJECTS:
        $object_error = !$allow_objects && preg_match('~( < \\s* //? \\s* (applet|object|param|embed) )~xi', $check, $matches);
        if ($object_error && $verbose) {
            $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // Set the final error value based on all of the results
        $error = $error || $css_tweaks_error || $javascript_error || $iframe_error || $object_error;
    }
    if ($error) {
        if ($verbose && !empty($User) && !empty($Group) && $User->check_perm('users', 'edit', false)) {
            $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=groups&amp;grp_ID=' . $Group->ID . '"'), 'error');
        }
        return false;
    }
    // Return sanitized content
    return $content;
}