/**
  * Test {@link balance_tags()}
  */
 function test_balanceTags()
 {
     $this->assertEqual(balance_tags('<div><!-- comment --></div>'), '<div><!-- comment --></div>');
     $this->assertEqual(balance_tags('<div><!-- comment -->'), '<div><!-- comment --></div>');
     $this->assertEqual(balance_tags('<!-- comment --></div>'), '<!-- comment -->');
     $this->assertEqual(balance_tags('<div> text </div>'), '<div> text </div>');
     $this->assertEqual(balance_tags('<div> text '), '<div> text </div>');
     $this->assertEqual(balance_tags(' text </div>'), ' text ');
 }
Example #2
0
/**
 * Crop string to maxwords preserving tags.
 *
 * @param string
 * @param int Maximum number words
 * @param mixed array Optional parameters
 * @return string
 */
function strmaxwords($str, $maxwords = 50, $params = array())
{
    $params = array_merge(array('continued_link' => '', 'continued_text' => '&hellip;', 'always_continue' => false), $params);
    $open = false;
    $have_seen_non_whitespace = false;
    $end = evo_strlen($str);
    for ($i = 0; $i < $end; $i++) {
        switch ($char = $str[$i]) {
            case '<':
                // start of a tag
                $open = true;
                break;
            case '>':
                // end of a tag
                $open = false;
                break;
            case ctype_space($char):
                if (!$open) {
                    // it's a word gap
                    // Eat any other whitespace.
                    while (isset($str[$i + 1]) && ctype_space($str[$i + 1])) {
                        $i++;
                    }
                    if (isset($str[$i + 1]) && $have_seen_non_whitespace) {
                        // only decrement words, if there's a non-space char left.
                        --$maxwords;
                    }
                }
                break;
            default:
                $have_seen_non_whitespace = true;
                break;
        }
        if ($maxwords < 1) {
            break;
        }
    }
    // restrict content to required number of words and balance the tags out
    $str = balance_tags(evo_substr($str, 0, $i));
    if ($params['always_continue'] || $maxwords == false) {
        // we want a continued text
        if ($params['continued_link']) {
            // we have a url
            $str .= ' <a href="' . $params['continued_link'] . '">' . $params['continued_text'] . '</a>';
        } else {
            // we don't have a url
            $str .= ' ' . $params['continued_text'];
        }
    }
    // remove empty tags
    $str = preg_replace('~<([\\s]+?)[^>]*?></\\1>~is', '', $str);
    return $str;
}
Example #3
0
function utf8_excerpt($text, $chars, $balance_tags = true, $middle = false)
{
    $len = mb_strlen($text);
    if ($len > $chars) {
        $text = $text . " ";
        if ($middle) {
            $start = (int) ($len - $chars) / 2;
        } else {
            $start = 0;
        }
        $text = mb_substr($text, $start, $chars);
        $last_space = mb_strrpos($text, ' ');
        if ($last_space !== false) {
            $text = mb_substr($text, 0, $last_space);
        }
        if ($middle) {
            $first_space = mb_strpos($text, ' ');
            if ($first_space !== false) {
                $text = mb_substr($text, $first_space);
            }
        }
        if ($balance_tags) {
            $text = balance_tags($text);
        }
        $text = preg_replace("/^[^a-zA-Z0-9]+/", "", $text);
        return $text . ' …';
    }
    return $text;
}
Example #4
0
 /**
  * Callback function to load a dot from NOT rendered content
  *
  * @param array Matches
  * @param boolean TRUE is used only to load dot without returning of tooltip template
  * @return string Empty string to don't display the dot template in content, It is printed out before image tag
  */
 function load_infodot_from_source($matches, $only_load_dot = false)
 {
     $link_ID = intval($matches[3]);
     if (empty($link_ID) || empty($matches) || empty($this->object_ID)) {
         // Skip this incorrect match
         return;
     }
     $LinkCache =& get_LinkCache();
     $Link =& $LinkCache->get_by_ID($link_ID, false, false);
     if (!$Link) {
         // Inform about invalid Link ID
         return '<div style="color:#F00"><b>' . T_('Invalid Link ID') . ' - ' . $matches[0] . '</b></div>';
     }
     if ($this->dot_numbers === NULL) {
         // Init dot numbers array first time
         $this->dot_numbers = array();
     }
     if (!isset($this->dot_numbers[$link_ID])) {
         // Start to calculate number of the dots for current Link object
         $this->dot_numbers[$link_ID] = 1;
     }
     if (!isset($this->loaded_objects[$this->object_ID])) {
         // Load dots only once
         if ($this->dots === NULL) {
             // Init dots array first time
             $this->dots = array();
         }
         if (!isset($this->dots[$link_ID])) {
             // Init sub array for each Link
             $this->dots[$link_ID] = array();
         }
         // Add dot
         $this->dots[$link_ID][] = array('x' => intval($matches[4]), 'y' => intval($matches[5]));
     }
     if ($only_load_dot) {
         // Exit here to don't execute a code below
         return;
     }
     $dot_num = $this->dot_numbers[$link_ID];
     if (empty($matches[6])) {
         // No defined width
         $tooltip_width = '';
     } else {
         // Set css style for width
         $tooltip_width = substr($matches[6], 1);
         $tooltip_width = strlen(intval($tooltip_width)) == strlen($tooltip_width) ? $tooltip_width . 'px' : $tooltip_width;
         $tooltip_width = ' style="width:' . $tooltip_width . '"';
     }
     $dot_xy = ' xy="' . $this->dots[$link_ID][$dot_num - 1]['x'] . ':' . $this->dots[$link_ID][$dot_num - 1]['y'] . '"';
     $this->dot_numbers[$link_ID]++;
     // Print this element that will be used for tooltip of the dot
     return '<div class="infodots_info" id="infodot_' . $link_ID . '_' . $dot_num . '"' . $dot_xy . $tooltip_width . '>' . balance_tags($matches[7]) . '</div>' . "\n";
 }
function pbm_prepare_html_message($message)
{
    pbm_msg('Message body (original): <pre style="font-size:10px">' . htmlspecialchars($message) . '</pre>');
    $marker = 0;
    if (preg_match('~<body[^>]*>(.*?)</body>~is', $message, $result)) {
        // First see if we can get contents of <body> tag
        $content = $result[1];
        $marker = 1;
    } elseif (preg_match('~<html[^>]*>(.*?)</html>~is', $message, $result)) {
        // <body> was not found, use <html> contents and delete <head> section from it
        $content = preg_replace('~<head[^>]*>(.*?)</head>~is', '', $result[1]);
        $marker = 1;
    }
    if (empty($marker)) {
        // None of the above methods worked, just use the original message body
        $content = $message;
    }
    // First fix different line-endings (dos, mac, unix), remove double newlines
    $content = str_replace(array("\r", "\n\n"), "\n", trim($content));
    // Decode 'category', 'title' and 'auth' tags
    $content = preg_replace('~&lt;(/)?(category|title|auth)&gt;~i', '<\\1\\2>', $content);
    if (($auth = pbm_get_auth_tag($content)) === false) {
        // No 'auth' tag provided, exit
        pbm_msg(sprintf(T_('&lt;auth&gt; tag not found! Please add username and password in message body in format %s.'), '"&lt;auth&gt;username:password&lt;/auth&gt;"'), true);
        return false;
    }
    // Balance tags
    $content = balance_tags($content);
    // Remove markup that cause validator errors
    $patterns = array('~ moz-do-not-send="true"~', '~ class="moz-signature" cols="\\d+"~', '~ goomoji="[^"]+"~');
    $content = preg_replace($patterns, '', $content);
    pbm_msg('Message body (processed): <pre style="font-size:10px">' . htmlspecialchars($content) . '</pre>');
    return array($auth, $content);
}
/**
 * Prepare html message
 *
 * @param string Message
 * @param boolean TRUE if script is executed by cron
 * @return string Content
 */
function dre_prepare_html_message($message, $cron = false)
{
    $marker = 0;
    if (preg_match('~<body[^>]*>(.*?)</body>~is', $message, $result)) {
        // First see if we can get contents of <body> tag
        $content = $result[1];
        $marker = 1;
    } elseif (preg_match('~<html[^>]*>(.*?)</html>~is', $message, $result)) {
        // <body> was not found, use <html> contents and delete <head> section from it
        $content = preg_replace('~<head[^>]*>(.*?)</head>~is', '', $result[1]);
        $marker = 1;
    }
    if (empty($marker)) {
        // None of the above methods worked, just use the original message body
        $content = $message;
    }
    // First fix different line-endings (dos, mac, unix), remove double newlines
    $content = str_replace(array("\r", "\n\n"), "\n", trim($content));
    // Decode 'category', 'title' and 'auth' tags
    $content = preg_replace('~&lt;(/)?(category|title|auth)&gt;~i', '<\\1\\2>', $content);
    // Balance tags
    $content = balance_tags($content);
    // Remove markup that cause validator errors
    $patterns = array('~ moz-do-not-send="true"~', '~ class="moz-signature" cols="\\d+"~', '~ goomoji="[^"]+"~');
    $content = preg_replace($patterns, '', $content);
    return $content;
}
Example #7
0
/**
 * Check raw HTML input for different levels of sanity including:
 * - XHTML validation
 * - Javascript injection
 * - antispam
 *
 * Also cleans up the content on some levels:
 * - trimming
 * - balancing tags
 *
 * WARNING: this does *NOT* (necessarilly) make the HTML code safe.
 * It only checks on it and produces error messages.
 * It is NOT (necessarily) safe to use the output.
 *
 * @param string The content to format
 * @param string
 * @param integer Create automated <br /> tags?
 * @param string Encoding (used for SafeHtmlChecker() only!); defaults to $io_charset
 * @return boolean|string
 */
function check_html_sanity($content, $context = 'posting', $autobr = false, $encoding = NULL)
{
    global $use_balanceTags, $admin_url;
    global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks;
    global $Messages;
    /**
     * @var User
     */
    global $current_User;
    switch ($context) {
        case 'posting':
        case 'xmlrpc_posting':
            $Group =& $current_User->get_Group();
            if ($context == 'posting') {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always';
            } else {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always';
            }
            $allow_css_tweaks = $Group->perm_xhtml_css_tweaks;
            $allow_javascript = $Group->perm_xhtml_javascript;
            $allow_iframes = $Group->perm_xhtml_iframes;
            $allow_objects = $Group->perm_xhtml_objects;
            $bypass_antispam = $Group->perm_bypass_antispam;
            break;
        case 'commenting':
            $xhtmlvalidation = $use_xhtmlvalidation_for_comments;
            $allow_css_tweaks = $comments_allow_css_tweaks;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions.
            // If so, then we also need to bypass in several other places.
            $bypass_antispam = false;
            break;
        default:
            debug_die('unknown context: ' . $context);
    }
    $error = false;
    // Replace any & that is not a character or entity reference with &amp;
    $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&amp;', $content);
    // ANTISPAM check:
    if (!$bypass_antispam && ($block = antispam_check($content))) {
        if ($context == 'xmlrpc_posting') {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s"'), $block);
        } else {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word &laquo;%s&raquo;'), htmlspecialchars($block));
        }
        $Messages->add($errmsg, 'error');
        $error = true;
    }
    if ($autobr) {
        // Auto <br />:
        // may put brs in the middle of multiline tags...
        // TODO: this may create "<br />" tags in "<UL>" (outside of <LI>) and make the HTML invalid! -> use autoP pugin?
        $content = autobrize($content);
    }
    $content = trim($content);
    if ($use_balanceTags) {
        // Auto close open tags:
        $content = balance_tags($content);
    }
    if ($xhtmlvalidation) {
        // We want to validate XHTML:
        load_class('xhtml_validator/_xhtml_validator.class.php');
        $XHTML_Validator =& new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding);
        if (!$XHTML_Validator->check($content)) {
            $error = true;
        }
    } else {
        // We do not WANT to validate XHTML, fall back to basic security checking:
        // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above.
        // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML.
        if ($context == 'commenting') {
            // DEPRECATED but still...
            // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags);
            $content = strip_tags($content, $comment_allowed_tags);
        }
        // Security checking:
        $check = $content;
        // Open comments or '<![CDATA[' are dangerous
        $check = str_replace('<!', '<', $check);
        // # # are delimiters
        // i modifier at the end means caseless
        // CHECK Styling restictions:
        if (!$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches)) {
            $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error');
            $error = true;
        }
        // CHECK JAVASCRIPT:
        if (!$allow_javascript && (preg_match('¤( < \\s* //? \\s* (script|noscript) )¤xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches))) {
            $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error');
            $error = true;
        }
        // CHECK IFRAMES:
        if (!$allow_iframes && preg_match('¤( < \\s* //? \\s* (frame|iframe) )¤xi', $check, $matches)) {
            $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error');
            $error = true;
        }
        // CHECK OBJECTS:
        if (!$allow_objects && preg_match('¤( < \\s* //? \\s* (applet|object|param|embed) )¤xi', $check, $matches)) {
            $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error');
            $error = true;
        }
    }
    if ($error) {
        if (!empty($current_User) && !empty($Group) && $current_User->check_perm('users', 'edit', false)) {
            $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=users&amp;grp_ID=' . $Group->ID . '"'), 'error');
        }
        return false;
    }
    // Return sanitized content
    return $content;
}
Example #8
0
/**
 * Check raw HTML input for different levels of sanity including:
 * - XHTML validation
 * - Javascript injection
 * - antispam
 *
 * Also cleans up the content on some levels:
 * - trimming
 * - balancing tags
 *
 * WARNING: this does *NOT* (necessarilly) make the HTML code safe.
 * It only checks on it and produces error messages.
 * It is NOT (necessarily) safe to use the output.
 *
 * @param string The content to format
 * @param string
 * @param User User (used for "posting" and "xmlrpc_posting" context). Default: $current_User
 * @param string Encoding (used for XHTML_Validator only!); defaults to $io_charset
 * @return boolean|string
 */
function check_html_sanity($content, $context = 'posting', $User = NULL, $encoding = NULL)
{
    global $use_balanceTags, $admin_url;
    global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks;
    global $Messages;
    if (empty($User)) {
        /**
         * @var User
         */
        global $current_User;
        $User = $current_User;
    }
    // Add error messages
    $verbose = true;
    switch ($context) {
        case 'posting':
        case 'xmlrpc_posting':
            $Group = $User->get_Group();
            if ($context == 'posting') {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always';
            } else {
                $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always';
            }
            $allow_css_tweaks = $Group->perm_xhtml_css_tweaks;
            $allow_javascript = $Group->perm_xhtml_javascript;
            $allow_iframes = $Group->perm_xhtml_iframes;
            $allow_objects = $Group->perm_xhtml_objects;
            $bypass_antispam = $Group->perm_bypass_antispam;
            break;
        case 'commenting':
            $xhtmlvalidation = $use_xhtmlvalidation_for_comments;
            $allow_css_tweaks = $comments_allow_css_tweaks;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions.
            // If so, then we also need to bypass in several other places.
            $bypass_antispam = false;
            break;
        case 'general_array_params':
            $xhtmlvalidation = false;
            $allow_css_tweaks = true;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            $bypass_antispam = false;
            // Do not add error messages in this context
            $verbose = false;
            break;
        case 'head_extension':
            $xhtmlvalidation = true;
            // We disable everything else, because the XMHTML validator will set explicit rules for the 'head_extension' context
            $allow_css_tweaks = false;
            $allow_javascript = false;
            $allow_iframes = false;
            $allow_objects = false;
            $bypass_antispam = false;
            // Do not add error messages in this context
            $verbose = false;
            break;
        default:
            debug_die('unknown context: ' . $context);
    }
    $error = false;
    // Replace any & that is not a character or entity reference with &amp;
    $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&amp;', $content);
    // ANTISPAM check:
    $error = !$bypass_antispam && ($block = antispam_check($content));
    if ($error && $verbose) {
        // Add error message
        if ($context == 'xmlrpc_posting') {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s".'), $block);
        } else {
            $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?).') : sprintf(T_('Illegal content found: blacklisted word &laquo;%s&raquo;.'), htmlspecialchars($block));
        }
        $Messages->add($errmsg, 'error');
    }
    $content = trim($content);
    if ($use_balanceTags && $context != 'general_array_params') {
        // Auto close open tags:
        // Auto close only if the content is NOT from a general array param where open and closed html tags may appear separately
        $content = balance_tags($content);
    }
    if ($xhtmlvalidation) {
        // We want to validate XHTML:
        load_class('xhtml_validator/_xhtml_validator.class.php', 'XHTML_Validator');
        $XHTML_Validator = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding);
        if (!$XHTML_Validator->check($content)) {
            $error = true;
        }
    } else {
        // We do not WANT to validate XHTML, fall back to basic security checking:
        // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above.
        // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML.
        if ($context == 'commenting') {
            // DEPRECATED but still...
            // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags);
            $content = strip_tags($content, $comment_allowed_tags);
        }
        // Security checking:
        $check = $content;
        // Open comments or '<![CDATA[' are dangerous
        $check = str_replace('<!', '<', $check);
        // # # are delimiters
        // i modifier at the end means caseless
        // CHECK Styling restictions:
        $css_tweaks_error = !$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches);
        if ($css_tweaks_error && $verbose) {
            $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK JAVASCRIPT:
        $javascript_error = !$allow_javascript && (preg_match('~( < \\s* //? \\s* (script|noscript) )~xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches));
        if ($javascript_error && $verbose) {
            $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK IFRAMES:
        $iframe_error = !$allow_iframes && preg_match('~( < \\s* //? \\s* (frame|iframe) )~xi', $check, $matches);
        if ($iframe_error && $verbose) {
            $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // CHECK OBJECTS:
        $object_error = !$allow_objects && preg_match('~( < \\s* //? \\s* (applet|object|param|embed) )~xi', $check, $matches);
        if ($object_error && $verbose) {
            $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error');
        }
        // Set the final error value based on all of the results
        $error = $error || $css_tweaks_error || $javascript_error || $iframe_error || $object_error;
    }
    if ($error) {
        if ($verbose && !empty($User) && !empty($Group) && $User->check_perm('users', 'edit', false)) {
            $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=groups&amp;grp_ID=' . $Group->ID . '"'), 'error');
        }
        return false;
    }
    // Return sanitized content
    return $content;
}