/** * SmartyPants. * * @param string $text * Text to be parsed. * @param string $attr * Value of the smart_quotes="" attribute. * @param string $ctx * MT context object (unused). */ function SmartyPants($text, $attr = NULL, $ctx = NULL) { if ($attr == NULL) { global $_typogrify_smartypants_attr; $attr = $_typogrify_smartypants_attr; } // Options to specify which transformations to make. $do_stupefy = FALSE; // Should we translate " entities into normal quotes? $convert_quot = 0; // Parse attributes: // 0 : do nothing // 1 : set all // 2 : set all, using old school en- and em- dash shortcuts // 3 : set all, using inverted old school en and em- dash shortcuts // // q : quotes // b : backtick quotes (``double'' and ,,double`` only) // B : backtick quotes (``double'', ,,double``, ,single` and `single') // d : dashes // D : old school dashes // i : inverted old school dashes // e : ellipses // w : convert " entities to " for Dreamweaver users. if ($attr == "0") { // Do nothing. return $text; } elseif ($attr == "1") { // Do everything, turn all options on. $do_quotes = 2; $do_backticks = 1; $do_dashes = 1; $do_ellipses = 1; } elseif ($attr == "2") { // Do everything, turn all options on, use old school dash shorthand. $do_quotes = 2; $do_backticks = 1; $do_dashes = 2; $do_ellipses = 1; } elseif ($attr == "3") { // Do everything, turn all options on, // use inverted old school dash shorthand. $do_quotes = 2; $do_backticks = 1; $do_dashes = 3; $do_ellipses = 1; } elseif ($attr == "-1") { // Special "stupefy" mode. $do_stupefy = 1; } else { $chars = preg_split('//', $attr); foreach ($chars as $c) { if ($c == "q") { $do_quotes = 1; } elseif ($c == "Q") { $do_quotes = 2; } elseif ($c == "b") { $do_backticks = 1; } elseif ($c == "B") { $do_backticks = 2; } elseif ($c == "d") { $do_dashes = 1; } elseif ($c == "D") { $do_dashes = 2; } elseif ($c == "i") { $do_dashes = 3; } elseif ($c == "e") { $do_ellipses = 1; } elseif ($c == "w") { $convert_quot = 1; } } } if ($do_quotes == 2) { $doc_lang = $ctx['langcode']; } else { $doc_lang = 'en'; } $tokens = _TokenizeHTML($text); $result = ''; // Keep track of when we're inside <pre> or <code> tags. $in_pre = 0; // This is a cheat, used to get some context // for one-character tokens that consist of // just a quote char. What we do is remember // the last character of the previous text // token, to use as context to curl single- // character quote tokens correctly. $prev_token_last_char = ''; foreach ($tokens as $cur_token) { if ($cur_token[0] == 'tag') { // Don't mess with quotes inside tags. $result .= $cur_token[1]; if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) { $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1; } else { // Reading language from span. if (preg_match('/<span .*(xml:)?lang="(..)"/', $cur_token[1], $matches)) { $span_lang = $matches[2]; } elseif ($cur_token[1] == '</span>') { unset($span_lang); } } } else { $t = $cur_token[1]; // Remember last char of this token before processing. $last_char = mb_substr($t, -1); if (!$in_pre) { $quotes = typogrify_i18n_quotes(isset($span_lang) ? $span_lang : $doc_lang); $t = ProcessEscapes($t); if ($convert_quot) { $t = preg_replace('/"/', '"', $t); } if ($do_dashes) { if ($do_dashes == 1) { $t = EducateDashes($t); } elseif ($do_dashes == 2) { $t = EducateDashesOldSchool($t); } elseif ($do_dashes == 3) { $t = EducateDashesOldSchoolInverted($t); } } if ($do_ellipses) { $t = EducateEllipses($t); } // Note: backticks need to be processed before quotes. if ($do_backticks) { $t = EducateBackticks($t); if ($do_backticks == 2) { $t = EducateSingleBackticks($t); } } if ($do_quotes) { $t = EducateBackticks($t); if ($t == "'") { // Special case: single-character ' token. if (preg_match('/\\S/', $prev_token_last_char)) { $t = $quotes[3]; } else { $t = $quotes[2]; } } elseif ($t == '"') { // Special case: single-character " token. if (preg_match('/\\S/', $prev_token_last_char)) { $t = $quotes[1]; } else { $t = $quotes[0]; } } else { // Normal case: $t = EducateQuotes($t, $quotes); } } if ($do_stupefy) { $t = StupefyEntities($t); } } $prev_token_last_char = $last_char; $result .= $t; } } return $result; }
function SmartyPants($text, $attr = NULL, $ctx = NULL) { global $smartypants_attr, $sp_tags_to_skip; # Paramaters: $text; # text to be parsed $attr; # value of the smart_quotes="" attribute $ctx; # MT context object (unused) if ($attr == NULL) { $attr = $smartypants_attr; } # Options to specify which transformations to make: $do_stupefy = FALSE; $convert_quot = 0; # should we translate " entities into normal quotes? # Parse attributes: # 0 : do nothing # 1 : set all # 2 : set all, using old school en- and em- dash shortcuts # 3 : set all, using inverted old school en and em- dash shortcuts # # q : quotes # b : backtick quotes (``double'' only) # B : backtick quotes (``double'' and `single') # d : dashes # D : old school dashes # i : inverted old school dashes # e : ellipses # w : convert " entities to " for Dreamweaver users if ($attr == "0") { # Do nothing. return $text; } else { if ($attr == "1") { # Do everything, turn all options on. $do_quotes = 1; $do_backticks = 1; $do_dashes = 1; $do_ellipses = 1; } else { if ($attr == "2") { # Do everything, turn all options on, use old school dash shorthand. $do_quotes = 1; $do_backticks = 1; $do_dashes = 2; $do_ellipses = 1; } else { if ($attr == "3") { # Do everything, turn all options on, use inverted old school dash shorthand. $do_quotes = 1; $do_backticks = 1; $do_dashes = 3; $do_ellipses = 1; } else { if ($attr == "-1") { # Special "stupefy" mode. $do_stupefy = 1; } else { $chars = preg_split('//', $attr); foreach ($chars as $c) { if ($c == "q") { $do_quotes = 1; } else { if ($c == "b") { $do_backticks = 1; } else { if ($c == "B") { $do_backticks = 2; } else { if ($c == "d") { $do_dashes = 1; } else { if ($c == "D") { $do_dashes = 2; } else { if ($c == "i") { $do_dashes = 3; } else { if ($c == "e") { $do_ellipses = 1; } else { if ($c == "w") { $convert_quot = 1; } else { # Unknown attribute option, ignore. } } } } } } } } } } } } } } $tokens = _TokenizeHTML($text); $result = ''; $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. $prev_token_last_char = ""; # This is a cheat, used to get some context # for one-character tokens that consist of # just a quote char. What we do is remember # the last character of the previous text # token, to use as context to curl single- # character quote tokens correctly. foreach ($tokens as $cur_token) { if ($cur_token[0] == "tag") { # Don't mess with quotes inside tags. $result .= $cur_token[1]; if (preg_match("@{$sp_tags_to_skip}@", $cur_token[1], $matches)) { $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1; } } else { $t = $cur_token[1]; $last_char = substr($t, -1); # Remember last char of this token before processing. if (!$in_pre) { $t = ProcessEscapes($t); if ($convert_quot) { $t = preg_replace('/"/', '"', $t); } if ($do_dashes) { if ($do_dashes == 1) { $t = EducateDashes($t); } if ($do_dashes == 2) { $t = EducateDashesOldSchool($t); } if ($do_dashes == 3) { $t = EducateDashesOldSchoolInverted($t); } } if ($do_ellipses) { $t = EducateEllipses($t); } # Note: backticks need to be processed before quotes. if ($do_backticks) { $t = EducateBackticks($t); if ($do_backticks == 2) { $t = EducateSingleBackticks($t); } } if ($do_quotes) { if ($t == "'") { # Special case: single-character ' token if (preg_match('/\\S/', $prev_token_last_char)) { $t = "’"; } else { $t = "‘"; } } else { if ($t == '"') { # Special case: single-character " token if (preg_match('/\\S/', $prev_token_last_char)) { $t = "”"; } else { $t = "“"; } } else { # Normal case: $t = EducateQuotes($t); } } } if ($do_stupefy) { $t = StupefyEntities($t); } } $prev_token_last_char = $last_char; $result .= $t; } } return $result; }
function SmartyPants($text = '', $mode = DEFAULT_OPERATION_MODE) { // quick return for empty string if ($text == '') { return ''; } // default all commands to FALSE, then reset according to $mode $do_quotes = $do_backticks_double = $do_backticks_all = FALSE; $do_dashes = $do_oldschool_dashes = $do_inverted_oldschool_dashes = FALSE; $do_ellipses = $do_stupefy = $convert_quot = FALSE; // setting these flags introduces a layer of abstraction that turned out // to be unnecessary. Such is porting. if ($mode & DO_QUOTES) { $do_quotes = TRUE; } if ($mode & DO_BACKTICKS_DOUBLE) { $do_backticks_double = TRUE; } if ($mode & DO_BACKTICKS_ALL) { $do_backticks_all = TRUE; } if ($mode & DO_DASHES) { $do_dashes = TRUE; } if ($mode & DO_OLDSCHOOL_DASHES) { $do_oldschool_dashes = TRUE; } if ($mode & DO_INVERTED_OLDSCHOOL_DASHES) { $do_inverted_oldschool_dashes = TRUE; } if ($mode & DO_ELLIPSES) { $do_ellipses = TRUE; } if ($mode & DO_QUOT_CONV) { $convert_quot = TRUE; } // tokenize input string -- break it into HTML tags and the text between them. $tokens = array(); _tokenize($text, $tokens); $result = ''; $in_pre = FALSE; # Keep track of when we're inside <pre> or <code> tags. $prev_token_last_char = ''; # This is a cheat, used to get some context # for one-character tokens that consist of # just a quote char. What we do is remember # the last character of the previous text # token, to use as context to curl single- # character quote tokens correctly. foreach ($tokens as $data) { if ($data['type'] == TOKENS_TYPE_TAG) { # Don't mess with quotes inside tags. $result .= $data['body']; // if the current tag contains text that should not be // modified, set $in_pre to TRUE if (preg_match(TAGS_TO_SKIP, $data['body'], $hits)) { $in_pre = $hits[1] == '' ? TRUE : FALSE; } } else { $t = $data['body']; $last_char = substr($t, -1); # Remember last char of this token before processing. if (!$in_pre) { $t = ProcessEscapes($t); if ($convert_quot) { $t = str_replace('"', '"', $t); } if ($do_dashes) { $t = EducateDashes($t); } elseif ($do_oldschool_dashes) { $t = EducateDashesOldSchool($t); } elseif ($do_inverted_oldschool_dashes) { $t = EducateDashesOldSchoolInverted($t); } if ($do_ellipses) { $t = EducateEllipses($t); } # Note: backticks need to be processed before quotes. if ($do_backticks_double || $do_backticks_all) { $t = EducateBackticks($t); } if ($do_backticks_all) { $t = EducateSingleBackticks($t); } if ($do_quotes) { if ($t == "'") { # Special case: single-character ' token if (preg_match("/\\S/", $prev_token_last_char)) { $t = "’"; } else { $t = "‘"; } } elseif ($t == '"') { # Special case: single-character " token if (preg_match("/\\S/", $prev_token_last_char)) { $t = "”"; } else { $t = "“"; } } else { $t = EducateQuotes($t); } } if ($do_stupefy) { $t = StupefyEntities($t); } } $prev_token_last_char = $last_char; $result .= $t; } } return $result; }