/** * Search only inside HTML elements for shortcodes and process them. * * Any [ or ] characters remaining inside elements will be HTML encoded * to prevent interference with shortcodes that are outside the elements. * Assumes $content processed by KSES already. Users with unfiltered_html * capability may get unexpected output if angle braces are nested in tags. * * @since 4.2.3 * * @param string $content Content to search for shortcodes * @param bool $ignore_html When true, all square braces inside elements will be encoded. * @return string Content with shortcodes filtered out. */ function do_shortcodes_in_html_tags($content, $ignore_html) { // Normalize entities in unfiltered HTML before adding placeholders. $trans = array('[' => '[', ']' => ']'); $content = strtr($content, $trans); $trans = array('[' => '[', ']' => ']'); $pattern = get_shortcode_regex(); $textarr = wp_html_split($content); foreach ($textarr as &$element) { if ('' == $element || '<' !== $element[0]) { continue; } $noopen = false === strpos($element, '['); $noclose = false === strpos($element, ']'); if ($noopen || $noclose) { // This element does not contain shortcodes. if ($noopen xor $noclose) { // Need to encode stray [ or ] chars. $element = strtr($element, $trans); } continue; } if ($ignore_html || '<!--' === substr($element, 0, 4) || '<![CDATA[' === substr($element, 0, 9)) { // Encode all [ and ] chars. $element = strtr($element, $trans); continue; } $attributes = wp_kses_attr_parse($element); if (false === $attributes) { // Some plugins are doing things like [name] <[email]>. if (1 === preg_match('%^<\\s*\\[\\[?[^\\[\\]]+\\]%', $element)) { $element = preg_replace_callback("/{$pattern}/s", 'do_shortcode_tag', $element); } // Looks like we found some crazy unfiltered HTML. Skipping it for sanity. $element = strtr($element, $trans); continue; } // Get element name $front = array_shift($attributes); $back = array_pop($attributes); $matches = array(); preg_match('%[a-zA-Z0-9]+%', $front, $matches); $elname = $matches[0]; // Look for shortcodes in each attribute separately. foreach ($attributes as &$attr) { $open = strpos($attr, '['); $close = strpos($attr, ']'); if (false === $open || false === $close) { continue; // Go to next attribute. Square braces will be escaped at end of loop. } $double = strpos($attr, '"'); $single = strpos($attr, "'"); if ((false === $single || $open < $single) && (false === $double || $open < $double)) { // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html. // In this specific situation we assume KSES did not run because the input // was written by an administrator, so we should avoid changing the output // and we do not need to run KSES here. $attr = preg_replace_callback("/{$pattern}/s", 'do_shortcode_tag', $attr); } else { // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'" // We do not know if $content was unfiltered. Assume KSES ran before shortcodes. $count = 0; $new_attr = preg_replace_callback("/{$pattern}/s", 'do_shortcode_tag', $attr, -1, $count); if ($count > 0) { // Sanitize the shortcode output using KSES. $new_attr = wp_kses_one_attr($new_attr, $elname); if ('' !== trim($new_attr)) { // The shortcode is safe to use now. $attr = $new_attr; } } } } $element = $front . implode('', $attributes) . $back; // Now encode any remaining [ or ] chars. $element = strtr($element, $trans); } $content = implode('', $textarr); return $content; }
/** * Search only inside HTML elements for shortcodes and process them. * * Any [ or ] characters remaining inside elements will be HTML encoded * to prevent interference with shortcodes that are outside the elements. * Assumes $content processed by KSES already. Users with unfiltered_html * capability may get unexpected output if angle braces are nested in tags. * * @since 4.2.3 * * @param string $content Content to search for shortcodes * @param bool $ignore_html When true, all square braces inside elements will be encoded. * @return string Content with shortcodes filtered out. */ function do_shortcodes_in_html_tags( $content, $ignore_html ) { // Normalize entities in unfiltered HTML before adding placeholders. $trans = array( '[' => '[', ']' => ']' ); $content = strtr( $content, $trans ); $trans = array( '[' => '[', ']' => ']' ); $pattern = get_shortcode_regex(); $comment_regex = '!' // Start of comment, after the <. . '(?:' // Unroll the loop: Consume everything until --> is found. . '-(?!->)' // Dash not followed by end of comment. . '[^\-]*+' // Consume non-dashes. . ')*+' // Loop possessively. . '(?:-->)?'; // End of comment. If not found, match all input. $regex = '/(' // Capture the entire match. . '<' // Find start of element. . '(?(?=!--)' // Is this a comment? . $comment_regex // Find end of comment. . '|' . '[^>]*>?' // Find end of element. If not found, match all input. . ')' . ')/s'; $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$element ) { if ( '<' !== $element[0] ) { continue; } $noopen = false === strpos( $element, '[' ); $noclose = false === strpos( $element, ']' ); if ( $noopen || $noclose ) { // This element does not contain shortcodes. if ( $noopen xor $noclose ) { // Need to encode stray [ or ] chars. $element = strtr( $element, $trans ); } continue; } if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) { // Encode all [ and ] chars. $element = strtr( $element, $trans ); continue; } $attributes = wp_kses_attr_parse( $element ); if ( false === $attributes ) { // Looks like we found some crazy unfiltered HTML. Skipping it for sanity. $element = strtr( $element, $trans ); continue; } // Get element name $front = array_shift( $attributes ); $back = array_pop( $attributes ); $matches = array(); preg_match('%[a-zA-Z0-9]+%', $front, $matches); $elname = $matches[0]; // Look for shortcodes in each attribute separately. foreach ( $attributes as &$attr ) { $open = strpos( $attr, '[' ); $close = strpos( $attr, ']' ); if ( false === $open || false === $close ) { continue; // Go to next attribute. Square braces will be escaped at end of loop. } $double = strpos( $attr, '"' ); $single = strpos( $attr, "'" ); if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) { // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html. // In this specific situation we assume KSES did not run because the input // was written by an administrator, so we should avoid changing the output // and we do not need to run KSES here. $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr ); } else { // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'" // We do not know if $content was unfiltered. Assume KSES ran before shortcodes. $count = 0; $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count ); if ( $count > 0 ) { // Sanitize the shortcode output using KSES. $new_attr = wp_kses_one_attr( $new_attr, $elname ); if ( '' !== $new_attr ) { // The shortcode is safe to use now. $attr = $new_attr; } } } } $element = $front . implode( '', $attributes ) . $back; // Now encode any remaining [ or ] chars. $element = strtr( $element, $trans ); } $content = implode( '', $textarr ); return $content; }
/** * Test new function wp_kses_one_attr(). * * @dataProvider data_one_attr */ function test_one_attr($element, $input, $output) { return $this->assertEquals($output, wp_kses_one_attr($input, $element)); }