Example #1
0
/**
 * Replaces common plain text characters into formatted entities
 *
 * As an example,
 *
 *     'cause today's effort makes it worth tomorrow's "holiday" ...
 *
 * Becomes:
 *
 *     ’cause today’s effort makes it worth tomorrow’s “holiday” …
 *
 * Code within certain html blocks are skipped.
 *
 * Do not use this function before the 'init' action hook; everything will break.
 *
 * @since 0.71
 *
 * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases
 * @global array $shortcode_tags
 * @staticvar array $static_characters
 * @staticvar array $static_replacements
 * @staticvar array $dynamic_characters
 * @staticvar array $dynamic_replacements
 * @staticvar array $default_no_texturize_tags
 * @staticvar array $default_no_texturize_shortcodes
 * @staticvar bool  $run_texturize
 *
 * @param string $text The text to be formatted
 * @param bool   $reset Set to true for unit testing. Translated patterns will reset.
 * @return string The string replaced with html entities
 */
function wptexturize($text, $reset = false)
{
    global $wp_cockneyreplace, $shortcode_tags;
    static $static_characters = null, $static_replacements = null, $dynamic_characters = null, $dynamic_replacements = null, $default_no_texturize_tags = null, $default_no_texturize_shortcodes = null, $run_texturize = true, $apos = null, $prime = null, $double_prime = null, $opening_quote = null, $closing_quote = null, $opening_single_quote = null, $closing_single_quote = null, $open_q_flag = '<!--oq-->', $open_sq_flag = '<!--osq-->', $apos_flag = '<!--apos-->';
    // If there's nothing to do, just stop.
    if (empty($text) || false === $run_texturize) {
        return $text;
    }
    // Set up static variables. Run once only.
    if ($reset || !isset($static_characters)) {
        /**
         * Filter whether to skip running wptexturize().
         *
         * Passing false to the filter will effectively short-circuit wptexturize().
         * returning the original text passed to the function instead.
         *
         * The filter runs only once, the first time wptexturize() is called.
         *
         * @since 4.0.0
         *
         * @see wptexturize()
         *
         * @param bool $run_texturize Whether to short-circuit wptexturize().
         */
        $run_texturize = apply_filters('run_wptexturize', $run_texturize);
        if (false === $run_texturize) {
            return $text;
        }
        /* translators: opening curly double quote */
        $opening_quote = _x('&#8220;', 'opening curly double quote');
        /* translators: closing curly double quote */
        $closing_quote = _x('&#8221;', 'closing curly double quote');
        /* translators: apostrophe, for example in 'cause or can't */
        $apos = _x('&#8217;', 'apostrophe');
        /* translators: prime, for example in 9' (nine feet) */
        $prime = _x('&#8242;', 'prime');
        /* translators: double prime, for example in 9" (nine inches) */
        $double_prime = _x('&#8243;', 'double prime');
        /* translators: opening curly single quote */
        $opening_single_quote = _x('&#8216;', 'opening curly single quote');
        /* translators: closing curly single quote */
        $closing_single_quote = _x('&#8217;', 'closing curly single quote');
        /* translators: en dash */
        $en_dash = _x('&#8211;', 'en dash');
        /* translators: em dash */
        $em_dash = _x('&#8212;', 'em dash');
        $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt');
        $default_no_texturize_shortcodes = array('code');
        // if a plugin has provided an autocorrect array, use it
        if (isset($wp_cockneyreplace)) {
            $cockney = array_keys($wp_cockneyreplace);
            $cockneyreplace = array_values($wp_cockneyreplace);
        } else {
            /* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use,
             * for example...  'We do not have enough words yet' ... is a typical quoted phrase.  But when we write
             * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes.
             */
            $cockney = explode(',', _x("'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", 'Comma-separated list of words to texturize in your language'));
            $cockneyreplace = explode(',', _x('&#8217;tain&#8217;t,&#8217;twere,&#8217;twas,&#8217;tis,&#8217;twill,&#8217;til,&#8217;bout,&#8217;nuff,&#8217;round,&#8217;cause,&#8217;em', 'Comma-separated list of replacement words in your language'));
        }
        $static_characters = array_merge(array('...', '``', '\'\'', ' (tm)'), $cockney);
        $static_replacements = array_merge(array('&#8230;', $opening_quote, $closing_quote, ' &#8482;'), $cockneyreplace);
        // Pattern-based replacements of characters.
        // Sort the remaining patterns into several arrays for performance tuning.
        $dynamic_characters = array('apos' => array(), 'quote' => array(), 'dash' => array());
        $dynamic_replacements = array('apos' => array(), 'quote' => array(), 'dash' => array());
        $dynamic = array();
        $spaces = wp_spaces_regexp();
        // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
        if ("'" !== $apos || "'" !== $closing_single_quote) {
            $dynamic['/\'(\\d\\d)\'(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|' . $spaces . ')/'] = $apos_flag . '$1' . $closing_single_quote;
        }
        if ("'" !== $apos || '"' !== $closing_quote) {
            $dynamic['/\'(\\d\\d)"(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|' . $spaces . ')/'] = $apos_flag . '$1' . $closing_quote;
        }
        // '99 '99s '99's (apostrophe)  But never '9 or '99% or '999 or '99.0.
        if ("'" !== $apos) {
            $dynamic['/\'(?=\\d\\d(?:\\Z|(?![%\\d]|[.,]\\d)))/'] = $apos_flag;
        }
        // Quoted Numbers like '0.42'
        if ("'" !== $opening_single_quote && "'" !== $closing_single_quote) {
            $dynamic['/(?<=\\A|' . $spaces . ')\'(\\d[.,\\d]*)\'/'] = $open_sq_flag . '$1' . $closing_single_quote;
        }
        // Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
        if ("'" !== $opening_single_quote) {
            $dynamic['/(?<=\\A|[([{"\\-]|&lt;|' . $spaces . ')\'/'] = $open_sq_flag;
        }
        // Apostrophe in a word.  No spaces, double apostrophes, or other punctuation.
        if ("'" !== $apos) {
            $dynamic['/(?<!' . $spaces . ')\'(?!\\Z|[.,:;!?"\'(){}[\\]\\-]|&[lg]t;|' . $spaces . ')/'] = $apos_flag;
        }
        $dynamic_characters['apos'] = array_keys($dynamic);
        $dynamic_replacements['apos'] = array_values($dynamic);
        $dynamic = array();
        // Quoted Numbers like "42"
        if ('"' !== $opening_quote && '"' !== $closing_quote) {
            $dynamic['/(?<=\\A|' . $spaces . ')"(\\d[.,\\d]*)"/'] = $open_q_flag . '$1' . $closing_quote;
        }
        // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
        if ('"' !== $opening_quote) {
            $dynamic['/(?<=\\A|[([{\\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/'] = $open_q_flag;
        }
        $dynamic_characters['quote'] = array_keys($dynamic);
        $dynamic_replacements['quote'] = array_values($dynamic);
        $dynamic = array();
        // Dashes and spaces
        $dynamic['/---/'] = $em_dash;
        $dynamic['/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/'] = $em_dash;
        $dynamic['/(?<!xn)--/'] = $en_dash;
        $dynamic['/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/'] = $en_dash;
        $dynamic_characters['dash'] = array_keys($dynamic);
        $dynamic_replacements['dash'] = array_values($dynamic);
    }
    // Must do this every time in case plugins use these filters in a context sensitive manner
    /**
     * Filter the list of HTML elements not to texturize.
     *
     * @since 2.8.0
     *
     * @param array $default_no_texturize_tags An array of HTML element names.
     */
    $no_texturize_tags = apply_filters('no_texturize_tags', $default_no_texturize_tags);
    /**
     * Filter the list of shortcodes not to texturize.
     *
     * @since 2.8.0
     *
     * @param array $default_no_texturize_shortcodes An array of shortcode names.
     */
    $no_texturize_shortcodes = apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes);
    $no_texturize_tags_stack = array();
    $no_texturize_shortcodes_stack = array();
    // Look for shortcodes and HTML elements.
    preg_match_all('@\\[/?([^<>&/\\[\\]\\x00-\\x20]++)@', $text, $matches);
    $tagnames = array_intersect(array_keys($shortcode_tags), $matches[1]);
    $found_shortcodes = !empty($tagnames);
    $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex($tagnames) : '';
    $regex = _get_wptexturize_split_regex($shortcode_regex);
    $textarr = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
    foreach ($textarr as &$curl) {
        // Only call _wptexturize_pushpop_element if $curl is a delimiter.
        $first = $curl[0];
        if ('<' === $first) {
            if ('<!--' === substr($curl, 0, 4)) {
                // This is an HTML comment delimiter.
                continue;
            } else {
                // This is an HTML element delimiter.
                // Replace each & with &#038; unless it already looks like an entity.
                $curl = preg_replace('/&(?!#(?:\\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl);
                _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags);
            }
        } elseif ('' === trim($curl)) {
            // This is a newline between delimiters.  Performance improves when we check this.
            continue;
        } elseif ('[' === $first && $found_shortcodes && 1 === preg_match('/^' . $shortcode_regex . '$/', $curl)) {
            // This is a shortcode delimiter.
            if ('[[' !== substr($curl, 0, 2) && ']]' !== substr($curl, -2)) {
                // Looks like a normal shortcode.
                _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes);
            } else {
                // Looks like an escaped shortcode.
                continue;
            }
        } elseif (empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) {
            // This is neither a delimiter, nor is this content inside of no_texturize pairs.  Do texturize.
            $curl = str_replace($static_characters, $static_replacements, $curl);
            if (false !== strpos($curl, "'")) {
                $curl = preg_replace($dynamic_characters['apos'], $dynamic_replacements['apos'], $curl);
                $curl = wptexturize_primes($curl, "'", $prime, $open_sq_flag, $closing_single_quote);
                $curl = str_replace($apos_flag, $apos, $curl);
                $curl = str_replace($open_sq_flag, $opening_single_quote, $curl);
            }
            if (false !== strpos($curl, '"')) {
                $curl = preg_replace($dynamic_characters['quote'], $dynamic_replacements['quote'], $curl);
                $curl = wptexturize_primes($curl, '"', $double_prime, $open_q_flag, $closing_quote);
                $curl = str_replace($open_q_flag, $opening_quote, $curl);
            }
            if (false !== strpos($curl, '-')) {
                $curl = preg_replace($dynamic_characters['dash'], $dynamic_replacements['dash'], $curl);
            }
            // 9x9 (times), but never 0x9999
            if (1 === preg_match('/(?<=\\d)x\\d/', $curl)) {
                // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
                $curl = preg_replace('/\\b(\\d(?(?<=0)[\\d\\.,]+|[\\d\\.,]*))x(\\d[\\d\\.,]*)\\b/', '$1&#215;$2', $curl);
            }
            // Replace each & with &#038; unless it already looks like an entity.
            $curl = preg_replace('/&(?!#(?:\\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl);
        }
    }
    return implode('', $textarr);
}
 /**
  * Automated performance testing of the main regex.
  *
  * @dataProvider data_whole_posts
  */
 function test_pcre_performance($input)
 {
     global $shortcode_tags;
     // With Shortcodes Disabled
     $regex = _get_wptexturize_split_regex();
     $result = benchmark_pcre_backtracking($regex, $input, 'split');
     $this->assertLessThan(200, $result);
     // With Shortcodes Enabled
     $shortcode_regex = _get_wptexturize_shortcode_regex(array_keys($shortcode_tags));
     $regex = _get_wptexturize_split_regex($shortcode_regex);
     $result = benchmark_pcre_backtracking($regex, $input, 'split');
     return $this->assertLessThan(200, $result);
 }