Esempio n. 1
0
/**
 * SmartyPants.
 *
 * @param string $text
 *   Text to be parsed.
 * @param string $attr
 *   Value of the smart_quotes="" attribute.
 * @param string $ctx
 *   MT context object (unused).
 */
function SmartyPants($text, $attr = NULL, $ctx = NULL)
{
    if ($attr == NULL) {
        global $_typogrify_smartypants_attr;
        $attr = $_typogrify_smartypants_attr;
    }
    // Options to specify which transformations to make.
    $do_stupefy = FALSE;
    // Should we translate " entities into normal quotes?
    $convert_quot = 0;
    // Parse attributes:
    // 0 : do nothing
    // 1 : set all
    // 2 : set all, using old school en- and em- dash shortcuts
    // 3 : set all, using inverted old school en and em- dash shortcuts
    //
    // q : quotes
    // b : backtick quotes (``double'' and ,,double`` only)
    // B : backtick quotes (``double'', ,,double``, ,single` and `single')
    // d : dashes
    // D : old school dashes
    // i : inverted old school dashes
    // e : ellipses
    // w : convert " entities to " for Dreamweaver users.
    if ($attr == "0") {
        // Do nothing.
        return $text;
    } elseif ($attr == "1") {
        // Do everything, turn all options on.
        $do_quotes = 2;
        $do_backticks = 1;
        $do_dashes = 1;
        $do_ellipses = 1;
    } elseif ($attr == "2") {
        // Do everything, turn all options on, use old school dash shorthand.
        $do_quotes = 2;
        $do_backticks = 1;
        $do_dashes = 2;
        $do_ellipses = 1;
    } elseif ($attr == "3") {
        // Do everything, turn all options on,
        // use inverted old school dash shorthand.
        $do_quotes = 2;
        $do_backticks = 1;
        $do_dashes = 3;
        $do_ellipses = 1;
    } elseif ($attr == "-1") {
        // Special "stupefy" mode.
        $do_stupefy = 1;
    } else {
        $chars = preg_split('//', $attr);
        foreach ($chars as $c) {
            if ($c == "q") {
                $do_quotes = 1;
            } elseif ($c == "Q") {
                $do_quotes = 2;
            } elseif ($c == "b") {
                $do_backticks = 1;
            } elseif ($c == "B") {
                $do_backticks = 2;
            } elseif ($c == "d") {
                $do_dashes = 1;
            } elseif ($c == "D") {
                $do_dashes = 2;
            } elseif ($c == "i") {
                $do_dashes = 3;
            } elseif ($c == "e") {
                $do_ellipses = 1;
            } elseif ($c == "w") {
                $convert_quot = 1;
            }
        }
    }
    if ($do_quotes == 2) {
        $doc_lang = $ctx['langcode'];
    } else {
        $doc_lang = 'en';
    }
    $tokens = _TokenizeHTML($text);
    $result = '';
    // Keep track of when we're inside <pre> or <code> tags.
    $in_pre = 0;
    // This is a cheat, used to get some context
    // for one-character tokens that consist of
    // just a quote char. What we do is remember
    // the last character of the previous text
    // token, to use as context to curl single-
    // character quote tokens correctly.
    $prev_token_last_char = '';
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == 'tag') {
            // Don't mess with quotes inside tags.
            $result .= $cur_token[1];
            if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
                $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
            } else {
                // Reading language from span.
                if (preg_match('/<span .*(xml:)?lang="(..)"/', $cur_token[1], $matches)) {
                    $span_lang = $matches[2];
                } elseif ($cur_token[1] == '</span>') {
                    unset($span_lang);
                }
            }
        } else {
            $t = $cur_token[1];
            // Remember last char of this token before processing.
            $last_char = mb_substr($t, -1);
            if (!$in_pre) {
                $quotes = typogrify_i18n_quotes(isset($span_lang) ? $span_lang : $doc_lang);
                $t = ProcessEscapes($t);
                if ($convert_quot) {
                    $t = preg_replace('/&quot;/', '"', $t);
                }
                if ($do_dashes) {
                    if ($do_dashes == 1) {
                        $t = EducateDashes($t);
                    } elseif ($do_dashes == 2) {
                        $t = EducateDashesOldSchool($t);
                    } elseif ($do_dashes == 3) {
                        $t = EducateDashesOldSchoolInverted($t);
                    }
                }
                if ($do_ellipses) {
                    $t = EducateEllipses($t);
                }
                // Note: backticks need to be processed before quotes.
                if ($do_backticks) {
                    $t = EducateBackticks($t);
                    if ($do_backticks == 2) {
                        $t = EducateSingleBackticks($t);
                    }
                }
                if ($do_quotes) {
                    $t = EducateBackticks($t);
                    if ($t == "'") {
                        // Special case: single-character ' token.
                        if (preg_match('/\\S/', $prev_token_last_char)) {
                            $t = $quotes[3];
                        } else {
                            $t = $quotes[2];
                        }
                    } elseif ($t == '"') {
                        // Special case: single-character " token.
                        if (preg_match('/\\S/', $prev_token_last_char)) {
                            $t = $quotes[1];
                        } else {
                            $t = $quotes[0];
                        }
                    } else {
                        // Normal case:
                        $t = EducateQuotes($t, $quotes);
                    }
                }
                if ($do_stupefy) {
                    $t = StupefyEntities($t);
                }
            }
            $prev_token_last_char = $last_char;
            $result .= $t;
        }
    }
    return $result;
}
Esempio n. 2
0
function SmartyPants($text, $attr = NULL, $ctx = NULL)
{
    global $smartypants_attr, $sp_tags_to_skip;
    # Paramaters:
    $text;
    # text to be parsed
    $attr;
    # value of the smart_quotes="" attribute
    $ctx;
    # MT context object (unused)
    if ($attr == NULL) {
        $attr = $smartypants_attr;
    }
    # Options to specify which transformations to make:
    $do_stupefy = FALSE;
    $convert_quot = 0;
    # should we translate &quot; entities into normal quotes?
    # Parse attributes:
    # 0 : do nothing
    # 1 : set all
    # 2 : set all, using old school en- and em- dash shortcuts
    # 3 : set all, using inverted old school en and em- dash shortcuts
    #
    # q : quotes
    # b : backtick quotes (``double'' only)
    # B : backtick quotes (``double'' and `single')
    # d : dashes
    # D : old school dashes
    # i : inverted old school dashes
    # e : ellipses
    # w : convert &quot; entities to " for Dreamweaver users
    if ($attr == "0") {
        # Do nothing.
        return $text;
    } else {
        if ($attr == "1") {
            # Do everything, turn all options on.
            $do_quotes = 1;
            $do_backticks = 1;
            $do_dashes = 1;
            $do_ellipses = 1;
        } else {
            if ($attr == "2") {
                # Do everything, turn all options on, use old school dash shorthand.
                $do_quotes = 1;
                $do_backticks = 1;
                $do_dashes = 2;
                $do_ellipses = 1;
            } else {
                if ($attr == "3") {
                    # Do everything, turn all options on, use inverted old school dash shorthand.
                    $do_quotes = 1;
                    $do_backticks = 1;
                    $do_dashes = 3;
                    $do_ellipses = 1;
                } else {
                    if ($attr == "-1") {
                        # Special "stupefy" mode.
                        $do_stupefy = 1;
                    } else {
                        $chars = preg_split('//', $attr);
                        foreach ($chars as $c) {
                            if ($c == "q") {
                                $do_quotes = 1;
                            } else {
                                if ($c == "b") {
                                    $do_backticks = 1;
                                } else {
                                    if ($c == "B") {
                                        $do_backticks = 2;
                                    } else {
                                        if ($c == "d") {
                                            $do_dashes = 1;
                                        } else {
                                            if ($c == "D") {
                                                $do_dashes = 2;
                                            } else {
                                                if ($c == "i") {
                                                    $do_dashes = 3;
                                                } else {
                                                    if ($c == "e") {
                                                        $do_ellipses = 1;
                                                    } else {
                                                        if ($c == "w") {
                                                            $convert_quot = 1;
                                                        } else {
                                                            # Unknown attribute option, ignore.
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    $tokens = _TokenizeHTML($text);
    $result = '';
    $in_pre = 0;
    # Keep track of when we're inside <pre> or <code> tags.
    $prev_token_last_char = "";
    # This is a cheat, used to get some context
    # for one-character tokens that consist of
    # just a quote char. What we do is remember
    # the last character of the previous text
    # token, to use as context to curl single-
    # character quote tokens correctly.
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == "tag") {
            # Don't mess with quotes inside tags.
            $result .= $cur_token[1];
            if (preg_match("@{$sp_tags_to_skip}@", $cur_token[1], $matches)) {
                $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
            }
        } else {
            $t = $cur_token[1];
            $last_char = substr($t, -1);
            # Remember last char of this token before processing.
            if (!$in_pre) {
                $t = ProcessEscapes($t);
                if ($convert_quot) {
                    $t = preg_replace('/&quot;/', '"', $t);
                }
                if ($do_dashes) {
                    if ($do_dashes == 1) {
                        $t = EducateDashes($t);
                    }
                    if ($do_dashes == 2) {
                        $t = EducateDashesOldSchool($t);
                    }
                    if ($do_dashes == 3) {
                        $t = EducateDashesOldSchoolInverted($t);
                    }
                }
                if ($do_ellipses) {
                    $t = EducateEllipses($t);
                }
                # Note: backticks need to be processed before quotes.
                if ($do_backticks) {
                    $t = EducateBackticks($t);
                    if ($do_backticks == 2) {
                        $t = EducateSingleBackticks($t);
                    }
                }
                if ($do_quotes) {
                    if ($t == "'") {
                        # Special case: single-character ' token
                        if (preg_match('/\\S/', $prev_token_last_char)) {
                            $t = "&#8217;";
                        } else {
                            $t = "&#8216;";
                        }
                    } else {
                        if ($t == '"') {
                            # Special case: single-character " token
                            if (preg_match('/\\S/', $prev_token_last_char)) {
                                $t = "&#8221;";
                            } else {
                                $t = "&#8220;";
                            }
                        } else {
                            # Normal case:
                            $t = EducateQuotes($t);
                        }
                    }
                }
                if ($do_stupefy) {
                    $t = StupefyEntities($t);
                }
            }
            $prev_token_last_char = $last_char;
            $result .= $t;
        }
    }
    return $result;
}
Esempio n. 3
0
function SmartyPants($text = '', $mode = DEFAULT_OPERATION_MODE)
{
    // quick return for empty string
    if ($text == '') {
        return '';
    }
    // default all commands to FALSE, then reset according to $mode
    $do_quotes = $do_backticks_double = $do_backticks_all = FALSE;
    $do_dashes = $do_oldschool_dashes = $do_inverted_oldschool_dashes = FALSE;
    $do_ellipses = $do_stupefy = $convert_quot = FALSE;
    // setting these flags introduces a layer of abstraction that turned out
    // to be unnecessary. Such is porting.
    if ($mode & DO_QUOTES) {
        $do_quotes = TRUE;
    }
    if ($mode & DO_BACKTICKS_DOUBLE) {
        $do_backticks_double = TRUE;
    }
    if ($mode & DO_BACKTICKS_ALL) {
        $do_backticks_all = TRUE;
    }
    if ($mode & DO_DASHES) {
        $do_dashes = TRUE;
    }
    if ($mode & DO_OLDSCHOOL_DASHES) {
        $do_oldschool_dashes = TRUE;
    }
    if ($mode & DO_INVERTED_OLDSCHOOL_DASHES) {
        $do_inverted_oldschool_dashes = TRUE;
    }
    if ($mode & DO_ELLIPSES) {
        $do_ellipses = TRUE;
    }
    if ($mode & DO_QUOT_CONV) {
        $convert_quot = TRUE;
    }
    // tokenize input string -- break it into HTML tags and the text between them.
    $tokens = array();
    _tokenize($text, $tokens);
    $result = '';
    $in_pre = FALSE;
    # Keep track of when we're inside <pre> or <code> tags.
    $prev_token_last_char = '';
    # This is a cheat, used to get some context
    # for one-character tokens that consist of
    # just a quote char. What we do is remember
    # the last character of the previous text
    # token, to use as context to curl single-
    # character quote tokens correctly.
    foreach ($tokens as $data) {
        if ($data['type'] == TOKENS_TYPE_TAG) {
            # Don't mess with quotes inside tags.
            $result .= $data['body'];
            // if the current tag contains text that should not be
            // modified, set $in_pre to TRUE
            if (preg_match(TAGS_TO_SKIP, $data['body'], $hits)) {
                $in_pre = $hits[1] == '' ? TRUE : FALSE;
            }
        } else {
            $t = $data['body'];
            $last_char = substr($t, -1);
            # Remember last char of this token before processing.
            if (!$in_pre) {
                $t = ProcessEscapes($t);
                if ($convert_quot) {
                    $t = str_replace('&quot;', '"', $t);
                }
                if ($do_dashes) {
                    $t = EducateDashes($t);
                } elseif ($do_oldschool_dashes) {
                    $t = EducateDashesOldSchool($t);
                } elseif ($do_inverted_oldschool_dashes) {
                    $t = EducateDashesOldSchoolInverted($t);
                }
                if ($do_ellipses) {
                    $t = EducateEllipses($t);
                }
                # Note: backticks need to be processed before quotes.
                if ($do_backticks_double || $do_backticks_all) {
                    $t = EducateBackticks($t);
                }
                if ($do_backticks_all) {
                    $t = EducateSingleBackticks($t);
                }
                if ($do_quotes) {
                    if ($t == "'") {
                        # Special case: single-character ' token
                        if (preg_match("/\\S/", $prev_token_last_char)) {
                            $t = "&#8217;";
                        } else {
                            $t = "&#8216;";
                        }
                    } elseif ($t == '"') {
                        # Special case: single-character " token
                        if (preg_match("/\\S/", $prev_token_last_char)) {
                            $t = "&#8221;";
                        } else {
                            $t = "&#8220;";
                        }
                    } else {
                        $t = EducateQuotes($t);
                    }
                }
                if ($do_stupefy) {
                    $t = StupefyEntities($t);
                }
            }
            $prev_token_last_char = $last_char;
            $result .= $t;
        }
    }
    return $result;
}