/** * Perform character conversion. * * @param string $test * Text to be parsed. * @param array $characters_to_convert * Array of ASCII characters to convert. * @return string * The result of the conversion. */ function convert_characters($text, $characters_to_convert) { if ($characters_to_convert == NULL || count($characters_to_convert) < 1) { // do nothing return $text; } // get ascii to unicode mappings $unicode_map = unicode_conversion_map(); foreach ($characters_to_convert as $ascii_string) { $unicode_strings[] = $unicode_map[$ascii_string]; } $tokens = _TokenizeHTML($text); $result = ''; $in_pre = 0; // Keep track of when we're inside <pre> or <code> tags foreach ($tokens as $cur_token) { if ($cur_token[0] == "tag") { // Don't mess with text inside tags, <pre> blocks, or <code> blocks $result .= $cur_token[1]; // Get the tags to skip regex from SmartyPants if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) { $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1; } } else { $t = $cur_token[1]; if ($in_pre == 0) { $t = ProcessEscapes($t); $t = str_replace($characters_to_convert, $unicode_strings, $t); } $result .= $t; } } return $result; }
function _apply_search_replace($str = '', $search, $replace) { global $TMPL; if ($str == '') { $str = $TMPL->tagdata; } $tokens = _TokenizeHTML($str); $result = ''; $in_skipped_tag = false; foreach ($tokens as $token) { if ($token[0] == 'tag') { $result .= $token[1]; if (preg_match('_' . '_', $token[1], $matches)) { $in_skipped_tag = isset($matches[1]) && $matches[1] == '/' ? false : true; } } else { if ($in_skipped_tag) { $result .= $token[1]; } else { $result .= preg_replace($search, $replace, $token[1]); } } } return $result; }
/** * Stylable capitals * * Wraps multiple capital letters in ``<span class="caps">`` * so they can be styled with CSS. * * Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. */ public static function caps($text) { // If _TokenizeHTML from Smartypants is not present, don't do anything. if (!function_exists('_TokenizeHTML')) { return $text; } $tokens = _TokenizeHTML($text); $result = array(); $in_skipped_tag = false; $cap_finder = "/(\n (\\b[A-Z\\d]* # Group 2: Any amount of caps and digits\n [A-Z]\\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them)\n [A-Z\\d]*\\b) # Any amount of caps and digits\n | (\\b[A-Z]+\\.\\s? # OR: Group 3: Some caps, followed by a '.' and an optional space\n (?:[A-Z]+\\.\\s?)+) # Followed by the same thing at least once more\n (?:\\s|\\b|\$))/x"; $tags_to_skip_regex = "/<(\\/)?(?:pre|code|kbd|script|math)[^>]*>/i"; foreach ($tokens as $token) { if ($token[0] == "tag") { // Don't mess with tags. $result[] = $token[1]; $close_match = preg_match($tags_to_skip_regex, $token[1]); if ($close_match) { $in_skipped_tag = true; } else { $in_skipped_tag = false; } } else { if ($in_skipped_tag) { $result[] = $token[1]; } else { $result[] = preg_replace_callback($cap_finder, array('Typogrify', '_cap_wrapper'), $token[1]); } } } return join("", $result); }
function _EscapeSpecialChars($text) { global $g_escape_table; $tokens = _TokenizeHTML($text); $text = ''; # rebuild $text from the tokens $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. $tags_to_skip = "!<(/?)(?:pre|code|kbd|script)[\\s>]!"; foreach ($tokens as $cur_token) { if ($cur_token[0] == 'tag') { $cur_token[1] = str_replace(array('*', '_'), array($g_escape_table['*'], $g_escape_table['_']), $cur_token[1]); $text .= $cur_token[1]; } else { $t = $cur_token[1]; if (!$in_pre) { $t = _EncodeBackslashEscapes($t); # $t =~ s{([a-z])/([a-z])}{$1 / $2}ig; } $text .= $t; } } return $text; }
function _EscapeSpecialChars($text) { global $md_escape_table; $tokens = _TokenizeHTML($text); $text = ''; # rebuild $text from the tokens # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!"; foreach ($tokens as $cur_token) { if ($cur_token[0] == 'tag') { # Within tags, encode * and _ so they don't conflict # with their use in Markdown for italics and strong. # We're replacing each such character with its # corresponding MD5 checksum value; this is likely # overkill, but it should prevent us from colliding # with the escape values by accident. $cur_token[1] = str_replace(array('*', '_'), array($md_escape_table['*'], $md_escape_table['_']), $cur_token[1]); $text .= $cur_token[1]; } else { $t = $cur_token[1]; $t = _EncodeBackslashEscapes($t); $text .= $t; } } return $text; }
function SmartEllipses($text, $attr = NULL, $ctx = NULL) { # Paramaters: $text; # text to be parsed $attr; # value of the smart_ellipses="" attribute $ctx; # MT context object (unused) if ($attr == NULL) { $attr = $smartypants_attr; } if ($attr == 0) { # do nothing; return $text; } $tokens; $tokens = _TokenizeHTML($text); $result = ''; $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags foreach ($tokens as $cur_token) { if ($cur_token[0] == "tag") { # Don't mess with quotes inside tags $result .= $cur_token[1]; if (preg_match("@{$sp_tags_to_skip}@", $cur_token[1], $matches)) { $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1; } } else { $t = $cur_token[1]; if (!$in_pre) { $t = ProcessEscapes($t); $t = EducateEllipses($t); } $result .= $t; } } return $result; }
/** * space_hyphens * * Replaces a normal dash with em-dash between whitespaces. */ function typogrify_space_hyphens($text) { $tokens; $tokens = _TokenizeHTML($text); $result = ''; // Keep track of when we're inside <pre> or <code> tags. $in_pre = 0; foreach ($tokens as $cur_token) { if ($cur_token[0] == "tag") { // Don't mess with quotes inside tags. $result .= $cur_token[1]; if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) { $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1; } } else { $t = $cur_token[1]; if (!$in_pre) { $t = preg_replace("/\\s(-{1,3})\\s/", ' — ', $t); } $result .= $t; } } return $result; }