/** * Replaces common plain text characters into formatted entities * * As an example, * <code> * 'cause today's effort makes it worth tomorrow's "holiday"... * </code> * Becomes: * <code> * ’cause today’s effort makes it worth tomorrow’s “holiday”… * </code> * Code within certain html blocks are skipped. * * @since 0.71 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted * @return string The string replaced with html entities */ function wptexturize($text) { global $wp_cockneyreplace; static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; $output = ''; $curl = ''; $textarr = preg_split('/(<.*>|\\[.*\\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); $stop = count($textarr); // No need to set up these variables more than once if (!$static_setup) { /* translators: opening curly quote */ $opening_quote = _x('“', 'opening curly quote'); /* translators: closing curly quote */ $closing_quote = _x('”', 'closing curly quote'); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); // if a plugin has provided an autocorrect array, use it if (isset($wp_cockneyreplace)) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); } else { $cockney = array("'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause"); $cockneyreplace = array("’tain’t", "’twere", "’twas", "’tis", "’twill", "’til", "’bout", "’nuff", "’round", "’cause"); } $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)'), $cockney); $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); $dynamic_characters = array('/\'(\\d\\d(?:’|\')?s)/', '/\'(\\d+)/', '/(\\s|\\A|[([{<]|")\'/', '/(\\d+)"/', '/(\\d+)\'/', '/(\\S)\'([^\'\\s])/', '/(\\s|\\A|[([{<])"(?!\\s)/', '/"(\\s|\\S|\\Z)/', '/\'([\\s.]|\\Z)/', '/\\b(\\d+)x(\\d+)\\b/'); $dynamic_replacements = array('’$1', '’$1', '$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); $static_setup = true; } // Transform into regexp sub-expression used in _wptexturize_pushpop_element // Must do this everytime in case plugins use these filters in a context sensitive manner $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags)) . ')'; $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes)) . ')'; $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); for ($i = 0; $i < $stop; $i++) { $curl = $textarr[$i]; if (!empty($curl) && '<' != $curl[0] && '[' != $curl[0] && empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // This is not a tag, nor is the texturization disabled // static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); } elseif (!empty($curl)) { /* * Only call _wptexturize_pushpop_element if first char is correct * tag opening */ if ('<' == $curl[0]) { _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); } elseif ('[' == $curl[0]) { _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } } $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); $output .= $curl; } return $output; }
/** * Replaces common plain text characters into formatted entities * * As an example, * <code> * 'cause today's effort makes it worth tomorrow's "holiday"... * </code> * Becomes: * <code> * ’cause today’s effort makes it worth tomorrow’s “holiday”… * </code> * Code within certain html blocks are skipped. * * @since 0.71 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted * @return string The string replaced with html entities */ function wptexturize($text) { global $wp_cockneyreplace; static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, $default_no_texturize_tags, $default_no_texturize_shortcodes; // No need to set up these static variables more than once if (!isset($static_characters)) { /* translators: opening curly double quote */ $opening_quote = _x('“', 'opening curly double quote'); /* translators: closing curly double quote */ $closing_quote = _x('”', 'closing curly double quote'); /* translators: apostrophe, for example in 'cause or can't */ $apos = _x('’', 'apostrophe'); /* translators: prime, for example in 9' (nine feet) */ $prime = _x('′', 'prime'); /* translators: double prime, for example in 9" (nine inches) */ $double_prime = _x('″', 'double prime'); /* translators: opening curly single quote */ $opening_single_quote = _x('‘', 'opening curly single quote'); /* translators: closing curly single quote */ $closing_single_quote = _x('’', 'closing curly single quote'); /* translators: en dash */ $en_dash = _x('–', 'en dash'); /* translators: em dash */ $em_dash = _x('—', 'em dash'); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); // if a plugin has provided an autocorrect array, use it if (isset($wp_cockneyreplace)) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); } elseif ("'" != $apos) { // Only bother if we're doing a replacement. $cockney = array("'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause"); $cockneyreplace = array($apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause"); } else { $cockney = $cockneyreplace = array(); } $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)'), $cockney); $static_replacements = array_merge(array($em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); $dynamic = array(); if ("'" != $apos) { $dynamic['/\'(\\d\\d(?:’|\')?s)/'] = $apos . '$1'; // '99's $dynamic['/\'(\\d)/'] = $apos . '$1'; // '99 } if ("'" != $opening_single_quote) { $dynamic['/(\\s|\\A|[([{<]|")\'/'] = '$1' . $opening_single_quote; } // opening single quote, even after (, {, <, [ if ('"' != $double_prime) { $dynamic['/(\\d)"/'] = '$1' . $double_prime; } // 9" (double prime) if ("'" != $prime) { $dynamic['/(\\d)\'/'] = '$1' . $prime; } // 9' (prime) if ("'" != $apos) { $dynamic['/(\\S)\'([^\'\\s])/'] = '$1' . $apos . '$2'; } // apostrophe in a word if ('"' != $opening_quote) { $dynamic['/(\\s|\\A|[([{<])"(?!\\s)/'] = '$1' . $opening_quote . '$2'; } // opening double quote, even after (, {, <, [ if ('"' != $closing_quote) { $dynamic['/"(\\s|\\S|\\Z)/'] = $closing_quote . '$1'; } // closing double quote if ("'" != $closing_single_quote) { $dynamic['/\'([\\s.]|\\Z)/'] = $closing_single_quote . '$1'; } // closing single quote $dynamic['/\\b(\\d+)x(\\d+)\\b/'] = '$1×$2'; // 9x9 (times) $dynamic_characters = array_keys($dynamic); $dynamic_replacements = array_values($dynamic); } // Transform into regexp sub-expression used in _wptexturize_pushpop_element // Must do this every time in case plugins use these filters in a context sensitive manner $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags)) . ')'; $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes)) . ')'; $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); $textarr = preg_split('/(<.*>|\\[.*\\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($textarr as &$curl) { if (empty($curl)) { continue; } // Only call _wptexturize_pushpop_element if first char is correct tag opening $first = $curl[0]; if ('<' === $first) { _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); } elseif ('[' === $first) { _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } elseif (empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // This is not a tag, nor is the texturization disabled static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); } $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); } return implode('', $textarr); }
/** * Replaces common plain text characters into formatted entities * * As an example, * * 'cause today's effort makes it worth tomorrow's "holiday" ... * * Becomes: * * ’cause today’s effort makes it worth tomorrow’s “holiday” … * * Code within certain html blocks are skipped. * * @since 0.71 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ function wptexturize($text, $reset = false) { global $wp_cockneyreplace, $shortcode_tags; static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true; // If there's nothing to do, just stop. if (empty($text) || false === $run_texturize) { return $text; } // Set up static variables. Run once only. if ($reset || !isset($static_characters)) { /** * Filter whether to skip running wptexturize(). * * Passing false to the filter will effectively short-circuit wptexturize(). * returning the original text passed to the function instead. * * The filter runs only once, the first time wptexturize() is called. * * @since 4.0.0 * * @see wptexturize() * * @param bool $run_texturize Whether to short-circuit wptexturize(). */ $run_texturize = apply_filters('run_wptexturize', $run_texturize); if (false === $run_texturize) { return $text; } /* translators: opening curly double quote */ $opening_quote = _x('“', 'opening curly double quote'); /* translators: closing curly double quote */ $closing_quote = _x('”', 'closing curly double quote'); /* translators: apostrophe, for example in 'cause or can't */ $apos = _x('’', 'apostrophe'); /* translators: prime, for example in 9' (nine feet) */ $prime = _x('′', 'prime'); /* translators: double prime, for example in 9" (nine inches) */ $double_prime = _x('″', 'double prime'); /* translators: opening curly single quote */ $opening_single_quote = _x('‘', 'opening curly single quote'); /* translators: closing curly single quote */ $closing_single_quote = _x('’', 'closing curly single quote'); /* translators: en dash */ $en_dash = _x('–', 'en dash'); /* translators: em dash */ $em_dash = _x('—', 'em dash'); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); // if a plugin has provided an autocorrect array, use it if (isset($wp_cockneyreplace)) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); } elseif ("'" != $apos) { // Only bother if we're doing a replacement. $cockney = array("'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause", "'em"); $cockneyreplace = array($apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause", $apos . "em"); } else { $cockney = $cockneyreplace = array(); } $static_characters = array_merge(array('...', '``', '\'\'', ' (tm)'), $cockney); $static_replacements = array_merge(array('…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); // Pattern-based replacements of characters. // Sort the remaining patterns into several arrays for performance tuning. $dynamic_characters = array('apos' => array(), 'quote' => array(), 'dash' => array()); $dynamic_replacements = array('apos' => array(), 'quote' => array(), 'dash' => array()); $dynamic = array(); $spaces = wp_spaces_regexp(); // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. if ("'" !== $apos || "'" !== $closing_single_quote) { $dynamic['/\'(\\d\\d)\'(?=\\Z|[.,)}\\-\\]]|>|' . $spaces . ')/'] = $apos . '$1' . $closing_single_quote; } if ("'" !== $apos || '"' !== $closing_quote) { $dynamic['/\'(\\d\\d)"(?=\\Z|[.,)}\\-\\]]|>|' . $spaces . ')/'] = $apos . '$1' . $closing_quote; } // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ("'" !== $apos) { $dynamic['/\'(?=\\d\\d(?:\\Z|(?![%\\d]|[.,]\\d)))/'] = $apos; } // Quoted Numbers like '0.42' if ("'" !== $opening_single_quote && "'" !== $closing_single_quote) { $dynamic['/(?<=\\A|' . $spaces . ')\'(\\d[.,\\d]*)\'/'] = $opening_single_quote . '$1' . $closing_single_quote; } // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ("'" !== $opening_single_quote) { $dynamic['/(?<=\\A|[([{"\\-]|<|' . $spaces . ')\'/'] = $opening_single_quote; } // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ("'" !== $apos) { $dynamic['/(?<!' . $spaces . ')\'(?!\\Z|[.,:;"\'(){}[\\]\\-]|&[lg]t;|' . $spaces . ')/'] = $apos; } // 9' (prime) if ("'" !== $prime) { $dynamic['/(?<=\\d)\'/'] = $prime; } // Single quotes followed by spaces or ending punctuation. if ("'" !== $closing_single_quote) { $dynamic['/\'(?=\\Z|[.,)}\\-\\]]|>|' . $spaces . ')/'] = $closing_single_quote; } $dynamic_characters['apos'] = array_keys($dynamic); $dynamic_replacements['apos'] = array_values($dynamic); $dynamic = array(); // Quoted Numbers like "42" if ('"' !== $opening_quote && '"' !== $closing_quote) { $dynamic['/(?<=\\A|' . $spaces . ')"(\\d[.,\\d]*)"/'] = $opening_quote . '$1' . $closing_quote; } // 9" (double prime) if ('"' !== $double_prime) { $dynamic['/(?<=\\d)"/'] = $double_prime; } // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. if ('"' !== $opening_quote) { $dynamic['/(?<=\\A|[([{\\-]|<|' . $spaces . ')"(?!' . $spaces . ')/'] = $opening_quote; } // Any remaining double quotes. if ('"' !== $closing_quote) { $dynamic['/"/'] = $closing_quote; } $dynamic_characters['quote'] = array_keys($dynamic); $dynamic_replacements['quote'] = array_values($dynamic); $dynamic = array(); // Dashes and spaces $dynamic['/---/'] = $em_dash; $dynamic['/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/'] = $em_dash; $dynamic['/(?<!xn)--/'] = $en_dash; $dynamic['/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/'] = $en_dash; $dynamic_characters['dash'] = array_keys($dynamic); $dynamic_replacements['dash'] = array_values($dynamic); } // Must do this every time in case plugins use these filters in a context sensitive manner /** * Filter the list of HTML elements not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_tags An array of HTML element names. */ $no_texturize_tags = apply_filters('no_texturize_tags', $default_no_texturize_tags); /** * Filter the list of shortcodes not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_shortcodes An array of shortcode names. */ $no_texturize_shortcodes = apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes); $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); // Look for shortcodes and HTML elements. $tagnames = array_keys($shortcode_tags); $tagregexp = join('|', array_map('preg_quote', $tagnames)); $tagregexp = "(?:{$tagregexp})(?![\\w-])"; // Excerpt of get_shortcode_regex(). $comment_regex = '!' . '(?:' . '-(?!->)' . '[^\\-]*+' . ')*+' . '(?:-->)?'; // End of comment. If not found, match all input. $shortcode_regex = '\\[' . '[\\/\\[]?' . $tagregexp . '(?:' . '[^\\[\\]<>]+' . '|' . '<[^\\[\\]>]*>' . ')*+' . '\\]' . '\\]?'; // Shortcodes may end with ]] $regex = '/(' . '<' . '(?(?=!--)' . $comment_regex . '|' . '[^>]*>' . ')' . '|' . $shortcode_regex . ')/s'; $textarr = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); foreach ($textarr as &$curl) { // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; if ('<' === $first && '<!--' === substr($curl, 0, 4)) { // This is an HTML comment delimeter. continue; } elseif ('<' === $first && '>' === substr($curl, -1)) { // This is an HTML element delimiter. _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags); } elseif ('' === trim($curl)) { // This is a newline between delimiters. Performance improves when we check this. continue; } elseif ('[' === $first && 1 === preg_match('/^' . $shortcode_regex . '$/', $curl)) { // This is a shortcode delimiter. if ('[[' !== substr($curl, 0, 2) && ']]' !== substr($curl, -2)) { // Looks like a normal shortcode. _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes); } else { // Looks like an escaped shortcode. continue; } } elseif (empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. $curl = str_replace($static_characters, $static_replacements, $curl); if (false !== strpos($curl, "'")) { $curl = preg_replace($dynamic_characters['apos'], $dynamic_replacements['apos'], $curl); } if (false !== strpos($curl, '"')) { $curl = preg_replace($dynamic_characters['quote'], $dynamic_replacements['quote'], $curl); } if (false !== strpos($curl, '-')) { $curl = preg_replace($dynamic_characters['dash'], $dynamic_replacements['dash'], $curl); } // 9x9 (times), but never 0x9999 if (1 === preg_match('/(?<=\\d)x\\d/', $curl)) { // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! $curl = preg_replace('/\\b(\\d(?(?<=0)[\\d\\.,]+|[\\d\\.,]*))x(\\d[\\d\\.,]*)\\b/', '$1×$2', $curl); } } } $text = implode('', $textarr); // Replace each & with & unless it already looks like an entity. $text = preg_replace('/&(?!#(?:\\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $text); return $text; }
/** * Replaces common plain text characters into formatted entities * * As an example, * * 'cause today's effort makes it worth tomorrow's "holiday" ... * * Becomes: * * ’cause today’s effort makes it worth tomorrow’s “holiday” … * * Code within certain html blocks are skipped. * * Do not use this function before the 'init' action hook; everything will break. * * @since 0.71 * * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases * @global array $shortcode_tags * @staticvar array $static_characters * @staticvar array $static_replacements * @staticvar array $dynamic_characters * @staticvar array $dynamic_replacements * @staticvar array $default_no_texturize_tags * @staticvar array $default_no_texturize_shortcodes * @staticvar bool $run_texturize * * @param string $text The text to be formatted * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with html entities */ function wptexturize($text, $reset = false) { global $wp_cockneyreplace, $shortcode_tags; static $static_characters = null, $static_replacements = null, $dynamic_characters = null, $dynamic_replacements = null, $default_no_texturize_tags = null, $default_no_texturize_shortcodes = null, $run_texturize = true, $apos = null, $prime = null, $double_prime = null, $opening_quote = null, $closing_quote = null, $opening_single_quote = null, $closing_single_quote = null, $open_q_flag = '<!--oq-->', $open_sq_flag = '<!--osq-->', $apos_flag = '<!--apos-->'; // If there's nothing to do, just stop. if (empty($text) || false === $run_texturize) { return $text; } // Set up static variables. Run once only. if ($reset || !isset($static_characters)) { /** * Filter whether to skip running wptexturize(). * * Passing false to the filter will effectively short-circuit wptexturize(). * returning the original text passed to the function instead. * * The filter runs only once, the first time wptexturize() is called. * * @since 4.0.0 * * @see wptexturize() * * @param bool $run_texturize Whether to short-circuit wptexturize(). */ $run_texturize = apply_filters('run_wptexturize', $run_texturize); if (false === $run_texturize) { return $text; } /* translators: opening curly double quote */ $opening_quote = _x('“', 'opening curly double quote'); /* translators: closing curly double quote */ $closing_quote = _x('”', 'closing curly double quote'); /* translators: apostrophe, for example in 'cause or can't */ $apos = _x('’', 'apostrophe'); /* translators: prime, for example in 9' (nine feet) */ $prime = _x('′', 'prime'); /* translators: double prime, for example in 9" (nine inches) */ $double_prime = _x('″', 'double prime'); /* translators: opening curly single quote */ $opening_single_quote = _x('‘', 'opening curly single quote'); /* translators: closing curly single quote */ $closing_single_quote = _x('’', 'closing curly single quote'); /* translators: en dash */ $en_dash = _x('–', 'en dash'); /* translators: em dash */ $em_dash = _x('—', 'em dash'); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); // if a plugin has provided an autocorrect array, use it if (isset($wp_cockneyreplace)) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); } else { /* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, * for example... 'We do not have enough words yet' ... is a typical quoted phrase. But when we write * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. */ $cockney = explode(',', _x("'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", 'Comma-separated list of words to texturize in your language')); $cockneyreplace = explode(',', _x('’tain’t,’twere,’twas,’tis,’twill,’til,’bout,’nuff,’round,’cause,’em', 'Comma-separated list of replacement words in your language')); } $static_characters = array_merge(array('...', '``', '\'\'', ' (tm)'), $cockney); $static_replacements = array_merge(array('…', $opening_quote, $closing_quote, ' ™'), $cockneyreplace); // Pattern-based replacements of characters. // Sort the remaining patterns into several arrays for performance tuning. $dynamic_characters = array('apos' => array(), 'quote' => array(), 'dash' => array()); $dynamic_replacements = array('apos' => array(), 'quote' => array(), 'dash' => array()); $dynamic = array(); $spaces = wp_spaces_regexp(); // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. if ("'" !== $apos || "'" !== $closing_single_quote) { $dynamic['/\'(\\d\\d)\'(?=\\Z|[.,:;!?)}\\-\\]]|>|' . $spaces . ')/'] = $apos_flag . '$1' . $closing_single_quote; } if ("'" !== $apos || '"' !== $closing_quote) { $dynamic['/\'(\\d\\d)"(?=\\Z|[.,:;!?)}\\-\\]]|>|' . $spaces . ')/'] = $apos_flag . '$1' . $closing_quote; } // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ("'" !== $apos) { $dynamic['/\'(?=\\d\\d(?:\\Z|(?![%\\d]|[.,]\\d)))/'] = $apos_flag; } // Quoted Numbers like '0.42' if ("'" !== $opening_single_quote && "'" !== $closing_single_quote) { $dynamic['/(?<=\\A|' . $spaces . ')\'(\\d[.,\\d]*)\'/'] = $open_sq_flag . '$1' . $closing_single_quote; } // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ("'" !== $opening_single_quote) { $dynamic['/(?<=\\A|[([{"\\-]|<|' . $spaces . ')\'/'] = $open_sq_flag; } // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ("'" !== $apos) { $dynamic['/(?<!' . $spaces . ')\'(?!\\Z|[.,:;!?"\'(){}[\\]\\-]|&[lg]t;|' . $spaces . ')/'] = $apos_flag; } $dynamic_characters['apos'] = array_keys($dynamic); $dynamic_replacements['apos'] = array_values($dynamic); $dynamic = array(); // Quoted Numbers like "42" if ('"' !== $opening_quote && '"' !== $closing_quote) { $dynamic['/(?<=\\A|' . $spaces . ')"(\\d[.,\\d]*)"/'] = $open_q_flag . '$1' . $closing_quote; } // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. if ('"' !== $opening_quote) { $dynamic['/(?<=\\A|[([{\\-]|<|' . $spaces . ')"(?!' . $spaces . ')/'] = $open_q_flag; } $dynamic_characters['quote'] = array_keys($dynamic); $dynamic_replacements['quote'] = array_values($dynamic); $dynamic = array(); // Dashes and spaces $dynamic['/---/'] = $em_dash; $dynamic['/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/'] = $em_dash; $dynamic['/(?<!xn)--/'] = $en_dash; $dynamic['/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/'] = $en_dash; $dynamic_characters['dash'] = array_keys($dynamic); $dynamic_replacements['dash'] = array_values($dynamic); } // Must do this every time in case plugins use these filters in a context sensitive manner /** * Filter the list of HTML elements not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_tags An array of HTML element names. */ $no_texturize_tags = apply_filters('no_texturize_tags', $default_no_texturize_tags); /** * Filter the list of shortcodes not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_shortcodes An array of shortcode names. */ $no_texturize_shortcodes = apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes); $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); // Look for shortcodes and HTML elements. preg_match_all('@\\[/?([^<>&/\\[\\]\\x00-\\x20]++)@', $text, $matches); $tagnames = array_intersect(array_keys($shortcode_tags), $matches[1]); $found_shortcodes = !empty($tagnames); $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex($tagnames) : ''; $regex = _get_wptexturize_split_regex($shortcode_regex); $textarr = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); foreach ($textarr as &$curl) { // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; if ('<' === $first) { if ('<!--' === substr($curl, 0, 4)) { // This is an HTML comment delimiter. continue; } else { // This is an HTML element delimiter. // Replace each & with & unless it already looks like an entity. $curl = preg_replace('/&(?!#(?:\\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl); _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags); } } elseif ('' === trim($curl)) { // This is a newline between delimiters. Performance improves when we check this. continue; } elseif ('[' === $first && $found_shortcodes && 1 === preg_match('/^' . $shortcode_regex . '$/', $curl)) { // This is a shortcode delimiter. if ('[[' !== substr($curl, 0, 2) && ']]' !== substr($curl, -2)) { // Looks like a normal shortcode. _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes); } else { // Looks like an escaped shortcode. continue; } } elseif (empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. $curl = str_replace($static_characters, $static_replacements, $curl); if (false !== strpos($curl, "'")) { $curl = preg_replace($dynamic_characters['apos'], $dynamic_replacements['apos'], $curl); $curl = wptexturize_primes($curl, "'", $prime, $open_sq_flag, $closing_single_quote); $curl = str_replace($apos_flag, $apos, $curl); $curl = str_replace($open_sq_flag, $opening_single_quote, $curl); } if (false !== strpos($curl, '"')) { $curl = preg_replace($dynamic_characters['quote'], $dynamic_replacements['quote'], $curl); $curl = wptexturize_primes($curl, '"', $double_prime, $open_q_flag, $closing_quote); $curl = str_replace($open_q_flag, $opening_quote, $curl); } if (false !== strpos($curl, '-')) { $curl = preg_replace($dynamic_characters['dash'], $dynamic_replacements['dash'], $curl); } // 9x9 (times), but never 0x9999 if (1 === preg_match('/(?<=\\d)x\\d/', $curl)) { // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! $curl = preg_replace('/\\b(\\d(?(?<=0)[\\d\\.,]+|[\\d\\.,]*))x(\\d[\\d\\.,]*)\\b/', '$1×$2', $curl); } // Replace each & with & unless it already looks like an entity. $curl = preg_replace('/&(?!#(?:\\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl); } } return implode('', $textarr); }
/** * Replaces common plain text characters into formatted entities * * As an example, * <code> * 'cause today's effort makes it worth tomorrow's "holiday"... * </code> * Becomes: * <code> * ’cause today’s effort makes it worth tomorrow’s “holiday”… * </code> * Code within certain html blocks are skipped. * * @since 0.71 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases * * @param string $text The text to be formatted * @return string The string replaced with html entities */ function wptexturize($text) { global $wp_cockneyreplace; static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, $default_no_texturize_tags, $default_no_texturize_shortcodes; // No need to set up these static variables more than once if ( ! isset( $static_characters ) ) { /* translators: opening curly double quote */ $opening_quote = _x( '“', 'opening curly double quote' ); /* translators: closing curly double quote */ $closing_quote = _x( '”', 'closing curly double quote' ); /* translators: apostrophe, for example in 'cause or can't */ $apos = _x( '’', 'apostrophe' ); /* translators: prime, for example in 9' (nine feet) */ $prime = _x( '′', 'prime' ); /* translators: double prime, for example in 9" (nine inches) */ $double_prime = _x( '″', 'double prime' ); /* translators: opening curly single quote */ $opening_single_quote = _x( '‘', 'opening curly single quote' ); /* translators: closing curly single quote */ $closing_single_quote = _x( '’', 'closing curly single quote' ); /* translators: en dash */ $en_dash = _x( '–', 'en dash' ); /* translators: em dash */ $em_dash = _x( '—', 'em dash' ); $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); $default_no_texturize_shortcodes = array('code'); // if a plugin has provided an autocorrect array, use it if ( isset($wp_cockneyreplace) ) { $cockney = array_keys($wp_cockneyreplace); $cockneyreplace = array_values($wp_cockneyreplace); } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" ); $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" ); } else { $cockney = $cockneyreplace = array(); } $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); /* * Regex for common whitespace characters. * * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence * was found to be unreliable due to random inclusion of the A0 byte. */ $spaces = '[\r\n\t ]|\xC2\xA0| '; // Pattern-based replacements of characters. $dynamic = array(); // '99 '99s '99's (apostrophe) if ( "'" !== $apos ) { $dynamic[ '/\'(?=\d)/' ] = $apos; } // Single quote at start, or preceded by (, {, <, [, ", or spaces. if ( "'" !== $opening_single_quote ) { $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; } // 9" (double prime) if ( '"' !== $double_prime ) { $dynamic[ '/(?<=\d)"/' ] = $double_prime; } // 9' (prime) if ( "'" !== $prime ) { $dynamic[ '/(?<=\d)\'/' ] = $prime; } // Apostrophe in a word. No spaces or double primes. if ( "'" !== $apos ) { $dynamic[ '/(?<!' . $spaces . ')\'(?!\'|' . $spaces . ')/' ] = $apos; } // Double quote at start, or preceded by (, {, <, [, or spaces, and not followed by spaces. if ( '"' !== $opening_quote ) { $dynamic[ '/(?<=\A|[([{<]|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote; } // Any remaining double quotes. if ( '"' !== $closing_quote ) { $dynamic[ '/"/' ] = $closing_quote; } // Single quotes followed by spaces or a period. if ( "'" !== $closing_single_quote ) { $dynamic[ '/\'(?=\Z|\.|' . $spaces . ')/' ] = $closing_single_quote; } $dynamic_characters = array_keys( $dynamic ); $dynamic_replacements = array_values( $dynamic ); } // Transform into regexp sub-expression used in _wptexturize_pushpop_element // Must do this every time in case plugins use these filters in a context sensitive manner /** * Filter the list of HTML elements not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_tags An array of HTML element names. */ $no_texturize_tags = '(' . implode( '|', apply_filters( 'no_texturize_tags', $default_no_texturize_tags ) ) . ')'; /** * Filter the list of shortcodes not to texturize. * * @since 2.8.0 * * @param array $default_no_texturize_shortcodes An array of shortcode names. */ $no_texturize_shortcodes = '(' . implode( '|', apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ) ) . ')'; $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); // Look for shortcodes and HTML elements. $shortcode_regex = '\[' // Find start of shortcode. . '[^\[\]<>]++' // Shortcodes do not contain other shortcodes. Possessive critical. . '\]'; // Find end of shortcode. $textarr = preg_split("/(<[^>]*>|$shortcode_regex)/s", $text, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ( $textarr as &$curl ) { if ( empty( $curl ) ) { continue; } // Only call _wptexturize_pushpop_element if first char is correct tag opening $first = $curl[0]; if ( '<' === $first ) { _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { // This is not a tag, nor is the texturization disabled static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); // 9x9 (times) if ( 1 === preg_match( '/(?<=\d)x\d/', $text ) ) { // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! $curl = preg_replace( '/\b(\d+)x(\d+)\b/', '$1×$2', $curl ); } } // Replace each & with & unless it already looks like an entity. $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); } return implode( '', $textarr ); }