/** * Test {@link balance_tags()} */ function test_balanceTags() { $this->assertEqual(balance_tags('<div><!-- comment --></div>'), '<div><!-- comment --></div>'); $this->assertEqual(balance_tags('<div><!-- comment -->'), '<div><!-- comment --></div>'); $this->assertEqual(balance_tags('<!-- comment --></div>'), '<!-- comment -->'); $this->assertEqual(balance_tags('<div> text </div>'), '<div> text </div>'); $this->assertEqual(balance_tags('<div> text '), '<div> text </div>'); $this->assertEqual(balance_tags(' text </div>'), ' text '); }
/** * Crop string to maxwords preserving tags. * * @param string * @param int Maximum number words * @param mixed array Optional parameters * @return string */ function strmaxwords($str, $maxwords = 50, $params = array()) { $params = array_merge(array('continued_link' => '', 'continued_text' => '…', 'always_continue' => false), $params); $open = false; $have_seen_non_whitespace = false; $end = evo_strlen($str); for ($i = 0; $i < $end; $i++) { switch ($char = $str[$i]) { case '<': // start of a tag $open = true; break; case '>': // end of a tag $open = false; break; case ctype_space($char): if (!$open) { // it's a word gap // Eat any other whitespace. while (isset($str[$i + 1]) && ctype_space($str[$i + 1])) { $i++; } if (isset($str[$i + 1]) && $have_seen_non_whitespace) { // only decrement words, if there's a non-space char left. --$maxwords; } } break; default: $have_seen_non_whitespace = true; break; } if ($maxwords < 1) { break; } } // restrict content to required number of words and balance the tags out $str = balance_tags(evo_substr($str, 0, $i)); if ($params['always_continue'] || $maxwords == false) { // we want a continued text if ($params['continued_link']) { // we have a url $str .= ' <a href="' . $params['continued_link'] . '">' . $params['continued_text'] . '</a>'; } else { // we don't have a url $str .= ' ' . $params['continued_text']; } } // remove empty tags $str = preg_replace('~<([\\s]+?)[^>]*?></\\1>~is', '', $str); return $str; }
function utf8_excerpt($text, $chars, $balance_tags = true, $middle = false) { $len = mb_strlen($text); if ($len > $chars) { $text = $text . " "; if ($middle) { $start = (int) ($len - $chars) / 2; } else { $start = 0; } $text = mb_substr($text, $start, $chars); $last_space = mb_strrpos($text, ' '); if ($last_space !== false) { $text = mb_substr($text, 0, $last_space); } if ($middle) { $first_space = mb_strpos($text, ' '); if ($first_space !== false) { $text = mb_substr($text, $first_space); } } if ($balance_tags) { $text = balance_tags($text); } $text = preg_replace("/^[^a-zA-Z0-9]+/", "", $text); return $text . ' …'; } return $text; }
/** * Callback function to load a dot from NOT rendered content * * @param array Matches * @param boolean TRUE is used only to load dot without returning of tooltip template * @return string Empty string to don't display the dot template in content, It is printed out before image tag */ function load_infodot_from_source($matches, $only_load_dot = false) { $link_ID = intval($matches[3]); if (empty($link_ID) || empty($matches) || empty($this->object_ID)) { // Skip this incorrect match return; } $LinkCache =& get_LinkCache(); $Link =& $LinkCache->get_by_ID($link_ID, false, false); if (!$Link) { // Inform about invalid Link ID return '<div style="color:#F00"><b>' . T_('Invalid Link ID') . ' - ' . $matches[0] . '</b></div>'; } if ($this->dot_numbers === NULL) { // Init dot numbers array first time $this->dot_numbers = array(); } if (!isset($this->dot_numbers[$link_ID])) { // Start to calculate number of the dots for current Link object $this->dot_numbers[$link_ID] = 1; } if (!isset($this->loaded_objects[$this->object_ID])) { // Load dots only once if ($this->dots === NULL) { // Init dots array first time $this->dots = array(); } if (!isset($this->dots[$link_ID])) { // Init sub array for each Link $this->dots[$link_ID] = array(); } // Add dot $this->dots[$link_ID][] = array('x' => intval($matches[4]), 'y' => intval($matches[5])); } if ($only_load_dot) { // Exit here to don't execute a code below return; } $dot_num = $this->dot_numbers[$link_ID]; if (empty($matches[6])) { // No defined width $tooltip_width = ''; } else { // Set css style for width $tooltip_width = substr($matches[6], 1); $tooltip_width = strlen(intval($tooltip_width)) == strlen($tooltip_width) ? $tooltip_width . 'px' : $tooltip_width; $tooltip_width = ' style="width:' . $tooltip_width . '"'; } $dot_xy = ' xy="' . $this->dots[$link_ID][$dot_num - 1]['x'] . ':' . $this->dots[$link_ID][$dot_num - 1]['y'] . '"'; $this->dot_numbers[$link_ID]++; // Print this element that will be used for tooltip of the dot return '<div class="infodots_info" id="infodot_' . $link_ID . '_' . $dot_num . '"' . $dot_xy . $tooltip_width . '>' . balance_tags($matches[7]) . '</div>' . "\n"; }
function pbm_prepare_html_message($message) { pbm_msg('Message body (original): <pre style="font-size:10px">' . htmlspecialchars($message) . '</pre>'); $marker = 0; if (preg_match('~<body[^>]*>(.*?)</body>~is', $message, $result)) { // First see if we can get contents of <body> tag $content = $result[1]; $marker = 1; } elseif (preg_match('~<html[^>]*>(.*?)</html>~is', $message, $result)) { // <body> was not found, use <html> contents and delete <head> section from it $content = preg_replace('~<head[^>]*>(.*?)</head>~is', '', $result[1]); $marker = 1; } if (empty($marker)) { // None of the above methods worked, just use the original message body $content = $message; } // First fix different line-endings (dos, mac, unix), remove double newlines $content = str_replace(array("\r", "\n\n"), "\n", trim($content)); // Decode 'category', 'title' and 'auth' tags $content = preg_replace('~<(/)?(category|title|auth)>~i', '<\\1\\2>', $content); if (($auth = pbm_get_auth_tag($content)) === false) { // No 'auth' tag provided, exit pbm_msg(sprintf(T_('<auth> tag not found! Please add username and password in message body in format %s.'), '"<auth>username:password</auth>"'), true); return false; } // Balance tags $content = balance_tags($content); // Remove markup that cause validator errors $patterns = array('~ moz-do-not-send="true"~', '~ class="moz-signature" cols="\\d+"~', '~ goomoji="[^"]+"~'); $content = preg_replace($patterns, '', $content); pbm_msg('Message body (processed): <pre style="font-size:10px">' . htmlspecialchars($content) . '</pre>'); return array($auth, $content); }
/** * Prepare html message * * @param string Message * @param boolean TRUE if script is executed by cron * @return string Content */ function dre_prepare_html_message($message, $cron = false) { $marker = 0; if (preg_match('~<body[^>]*>(.*?)</body>~is', $message, $result)) { // First see if we can get contents of <body> tag $content = $result[1]; $marker = 1; } elseif (preg_match('~<html[^>]*>(.*?)</html>~is', $message, $result)) { // <body> was not found, use <html> contents and delete <head> section from it $content = preg_replace('~<head[^>]*>(.*?)</head>~is', '', $result[1]); $marker = 1; } if (empty($marker)) { // None of the above methods worked, just use the original message body $content = $message; } // First fix different line-endings (dos, mac, unix), remove double newlines $content = str_replace(array("\r", "\n\n"), "\n", trim($content)); // Decode 'category', 'title' and 'auth' tags $content = preg_replace('~<(/)?(category|title|auth)>~i', '<\\1\\2>', $content); // Balance tags $content = balance_tags($content); // Remove markup that cause validator errors $patterns = array('~ moz-do-not-send="true"~', '~ class="moz-signature" cols="\\d+"~', '~ goomoji="[^"]+"~'); $content = preg_replace($patterns, '', $content); return $content; }
/** * Check raw HTML input for different levels of sanity including: * - XHTML validation * - Javascript injection * - antispam * * Also cleans up the content on some levels: * - trimming * - balancing tags * * WARNING: this does *NOT* (necessarilly) make the HTML code safe. * It only checks on it and produces error messages. * It is NOT (necessarily) safe to use the output. * * @param string The content to format * @param string * @param integer Create automated <br /> tags? * @param string Encoding (used for SafeHtmlChecker() only!); defaults to $io_charset * @return boolean|string */ function check_html_sanity($content, $context = 'posting', $autobr = false, $encoding = NULL) { global $use_balanceTags, $admin_url; global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks; global $Messages; /** * @var User */ global $current_User; switch ($context) { case 'posting': case 'xmlrpc_posting': $Group =& $current_User->get_Group(); if ($context == 'posting') { $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always'; } else { $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always'; } $allow_css_tweaks = $Group->perm_xhtml_css_tweaks; $allow_javascript = $Group->perm_xhtml_javascript; $allow_iframes = $Group->perm_xhtml_iframes; $allow_objects = $Group->perm_xhtml_objects; $bypass_antispam = $Group->perm_bypass_antispam; break; case 'commenting': $xhtmlvalidation = $use_xhtmlvalidation_for_comments; $allow_css_tweaks = $comments_allow_css_tweaks; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions. // If so, then we also need to bypass in several other places. $bypass_antispam = false; break; default: debug_die('unknown context: ' . $context); } $error = false; // Replace any & that is not a character or entity reference with & $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&', $content); // ANTISPAM check: if (!$bypass_antispam && ($block = antispam_check($content))) { if ($context == 'xmlrpc_posting') { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s"'), $block); } else { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word «%s»'), htmlspecialchars($block)); } $Messages->add($errmsg, 'error'); $error = true; } if ($autobr) { // Auto <br />: // may put brs in the middle of multiline tags... // TODO: this may create "<br />" tags in "<UL>" (outside of <LI>) and make the HTML invalid! -> use autoP pugin? $content = autobrize($content); } $content = trim($content); if ($use_balanceTags) { // Auto close open tags: $content = balance_tags($content); } if ($xhtmlvalidation) { // We want to validate XHTML: load_class('xhtml_validator/_xhtml_validator.class.php'); $XHTML_Validator =& new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding); if (!$XHTML_Validator->check($content)) { $error = true; } } else { // We do not WANT to validate XHTML, fall back to basic security checking: // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above. // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML. if ($context == 'commenting') { // DEPRECATED but still... // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags); $content = strip_tags($content, $comment_allowed_tags); } // Security checking: $check = $content; // Open comments or '<![CDATA[' are dangerous $check = str_replace('<!', '<', $check); // # # are delimiters // i modifier at the end means caseless // CHECK Styling restictions: if (!$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches)) { $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error'); $error = true; } // CHECK JAVASCRIPT: if (!$allow_javascript && (preg_match('¤( < \\s* //? \\s* (script|noscript) )¤xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches))) { $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error'); $error = true; } // CHECK IFRAMES: if (!$allow_iframes && preg_match('¤( < \\s* //? \\s* (frame|iframe) )¤xi', $check, $matches)) { $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error'); $error = true; } // CHECK OBJECTS: if (!$allow_objects && preg_match('¤( < \\s* //? \\s* (applet|object|param|embed) )¤xi', $check, $matches)) { $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error'); $error = true; } } if ($error) { if (!empty($current_User) && !empty($Group) && $current_User->check_perm('users', 'edit', false)) { $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=users&grp_ID=' . $Group->ID . '"'), 'error'); } return false; } // Return sanitized content return $content; }
/** * Check raw HTML input for different levels of sanity including: * - XHTML validation * - Javascript injection * - antispam * * Also cleans up the content on some levels: * - trimming * - balancing tags * * WARNING: this does *NOT* (necessarilly) make the HTML code safe. * It only checks on it and produces error messages. * It is NOT (necessarily) safe to use the output. * * @param string The content to format * @param string * @param User User (used for "posting" and "xmlrpc_posting" context). Default: $current_User * @param string Encoding (used for XHTML_Validator only!); defaults to $io_charset * @return boolean|string */ function check_html_sanity($content, $context = 'posting', $User = NULL, $encoding = NULL) { global $use_balanceTags, $admin_url; global $io_charset, $use_xhtmlvalidation_for_comments, $comment_allowed_tags, $comments_allow_css_tweaks; global $Messages; if (empty($User)) { /** * @var User */ global $current_User; $User = $current_User; } // Add error messages $verbose = true; switch ($context) { case 'posting': case 'xmlrpc_posting': $Group = $User->get_Group(); if ($context == 'posting') { $xhtmlvalidation = $Group->perm_xhtmlvalidation == 'always'; } else { $xhtmlvalidation = $Group->perm_xhtmlvalidation_xmlrpc == 'always'; } $allow_css_tweaks = $Group->perm_xhtml_css_tweaks; $allow_javascript = $Group->perm_xhtml_javascript; $allow_iframes = $Group->perm_xhtml_iframes; $allow_objects = $Group->perm_xhtml_objects; $bypass_antispam = $Group->perm_bypass_antispam; break; case 'commenting': $xhtmlvalidation = $use_xhtmlvalidation_for_comments; $allow_css_tweaks = $comments_allow_css_tweaks; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; // fp> I don't know if it makes sense to bypass antispam in commenting context if the user has that kind of permissions. // If so, then we also need to bypass in several other places. $bypass_antispam = false; break; case 'general_array_params': $xhtmlvalidation = false; $allow_css_tweaks = true; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; $bypass_antispam = false; // Do not add error messages in this context $verbose = false; break; case 'head_extension': $xhtmlvalidation = true; // We disable everything else, because the XMHTML validator will set explicit rules for the 'head_extension' context $allow_css_tweaks = false; $allow_javascript = false; $allow_iframes = false; $allow_objects = false; $bypass_antispam = false; // Do not add error messages in this context $verbose = false; break; default: debug_die('unknown context: ' . $context); } $error = false; // Replace any & that is not a character or entity reference with & $content = preg_replace('/&(?!#[0-9]+;|#x[0-9a-fA-F]+;|[a-zA-Z_:][a-zA-Z0-9._:-]*;)/', '&', $content); // ANTISPAM check: $error = !$bypass_antispam && ($block = antispam_check($content)); if ($error && $verbose) { // Add error message if ($context == 'xmlrpc_posting') { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?)') : sprintf(T_('Illegal content found: blacklisted word "%s".'), $block); } else { $errmsg = $context == 'commenting' ? T_('Illegal content found (spam?).') : sprintf(T_('Illegal content found: blacklisted word «%s».'), htmlspecialchars($block)); } $Messages->add($errmsg, 'error'); } $content = trim($content); if ($use_balanceTags && $context != 'general_array_params') { // Auto close open tags: // Auto close only if the content is NOT from a general array param where open and closed html tags may appear separately $content = balance_tags($content); } if ($xhtmlvalidation) { // We want to validate XHTML: load_class('xhtml_validator/_xhtml_validator.class.php', 'XHTML_Validator'); $XHTML_Validator = new XHTML_Validator($context, $allow_css_tweaks, $allow_iframes, $allow_javascript, $allow_objects, $encoding); if (!$XHTML_Validator->check($content)) { $error = true; } } else { // We do not WANT to validate XHTML, fall back to basic security checking: // This is only as strong as its regexps can parse xhtml. This is significantly inferior to the XHTML checker above. // The only advantage of this checker is that it can check for a little security without requiring VALID XHTML. if ($context == 'commenting') { // DEPRECATED but still... // echo 'allowed tags:',htmlspecialchars($comment_allowed_tags); $content = strip_tags($content, $comment_allowed_tags); } // Security checking: $check = $content; // Open comments or '<![CDATA[' are dangerous $check = str_replace('<!', '<', $check); // # # are delimiters // i modifier at the end means caseless // CHECK Styling restictions: $css_tweaks_error = !$allow_css_tweaks && preg_match('#\\s((style|class|id)\\s*=)#i', $check, $matches); if ($css_tweaks_error && $verbose) { $Messages->add(T_('Illegal CSS markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK JAVASCRIPT: $javascript_error = !$allow_javascript && (preg_match('~( < \\s* //? \\s* (script|noscript) )~xi', $check, $matches) || preg_match('#\\s((on[a-z]+)\\s*=)#i', $check, $matches) || preg_match('#=["\'\\s]*((javascript|vbscript|about):)#i', $check, $matches)); if ($javascript_error && $verbose) { $Messages->add(T_('Illegal javascript markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK IFRAMES: $iframe_error = !$allow_iframes && preg_match('~( < \\s* //? \\s* (frame|iframe) )~xi', $check, $matches); if ($iframe_error && $verbose) { $Messages->add(T_('Illegal frame markup found: ') . htmlspecialchars($matches[1]), 'error'); } // CHECK OBJECTS: $object_error = !$allow_objects && preg_match('~( < \\s* //? \\s* (applet|object|param|embed) )~xi', $check, $matches); if ($object_error && $verbose) { $Messages->add(T_('Illegal object markup found: ') . htmlspecialchars($matches[1]), 'error'); } // Set the final error value based on all of the results $error = $error || $css_tweaks_error || $javascript_error || $iframe_error || $object_error; } if ($error) { if ($verbose && !empty($User) && !empty($Group) && $User->check_perm('users', 'edit', false)) { $Messages->add(sprintf(T_('(Note: To get rid of the above validation warnings, you can deactivate unwanted validation rules in your <a %s>Group settings</a>.)'), 'href="' . $admin_url . '?ctrl=groups&grp_ID=' . $Group->ID . '"'), 'error'); } return false; } // Return sanitized content return $content; }