_TokenizeHTML PHP Code Examples

Example #1

0

Show file

File: unicode-conversion.php Project: beltofte/ding

/**
 * Perform character conversion.
 *
 * @param string $test
 *    Text to be parsed.
 * @param array $characters_to_convert
 *    Array of ASCII characters to convert.
 * @return string
 *    The result of the conversion.
 */
function convert_characters($text, $characters_to_convert)
{
    if ($characters_to_convert == NULL || count($characters_to_convert) < 1) {
        // do nothing
        return $text;
    }
    // get ascii to unicode mappings
    $unicode_map = unicode_conversion_map();
    foreach ($characters_to_convert as $ascii_string) {
        $unicode_strings[] = $unicode_map[$ascii_string];
    }
    $tokens = _TokenizeHTML($text);
    $result = '';
    $in_pre = 0;
    // Keep track of when we're inside <pre> or <code> tags
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == "tag") {
            // Don't mess with text inside tags, <pre> blocks, or <code> blocks
            $result .= $cur_token[1];
            // Get the tags to skip regex from SmartyPants
            if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
                $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
            }
        } else {
            $t = $cur_token[1];
            if ($in_pre == 0) {
                $t = ProcessEscapes($t);
                $t = str_replace($characters_to_convert, $unicode_strings, $t);
            }
            $result .= $t;
        }
    }
    return $result;
}

Example #2

0

Show file

File: pi.sb_better.php Project: scottboms/sb_betteramp.ee_addon

 function _apply_search_replace($str = '', $search, $replace)
 {
     global $TMPL;
     if ($str == '') {
         $str = $TMPL->tagdata;
     }
     $tokens = _TokenizeHTML($str);
     $result = '';
     $in_skipped_tag = false;
     foreach ($tokens as $token) {
         if ($token[0] == 'tag') {
             $result .= $token[1];
             if (preg_match('_' . '_', $token[1], $matches)) {
                 $in_skipped_tag = isset($matches[1]) && $matches[1] == '/' ? false : true;
             }
         } else {
             if ($in_skipped_tag) {
                 $result .= $token[1];
             } else {
                 $result .= preg_replace($search, $replace, $token[1]);
             }
         }
     }
     return $result;
 }

Example #3

0

Show file

File: typogrify.class.php Project: beltofte/ding

 /**
  * Stylable capitals
  *
  * Wraps multiple capital letters in ``<span class="caps">`` 
  * so they can be styled with CSS. 
  * 
  * Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
  */
 public static function caps($text)
 {
     // If _TokenizeHTML from Smartypants is not present, don't do anything.
     if (!function_exists('_TokenizeHTML')) {
         return $text;
     }
     $tokens = _TokenizeHTML($text);
     $result = array();
     $in_skipped_tag = false;
     $cap_finder = "/(\n            (\\b[A-Z\\d]*        # Group 2: Any amount of caps and digits\n            [A-Z]\\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)\n            [A-Z\\d]*\\b)        # Any amount of caps and digits\n            | (\\b[A-Z]+\\.\\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space\n            (?:[A-Z]+\\.\\s?)+)  # Followed by the same thing at least once more\n            (?:\\s|\\b|\$))/x";
     $tags_to_skip_regex = "/<(\\/)?(?:pre|code|kbd|script|math)[^>]*>/i";
     foreach ($tokens as $token) {
         if ($token[0] == "tag") {
             // Don't mess with tags.
             $result[] = $token[1];
             $close_match = preg_match($tags_to_skip_regex, $token[1]);
             if ($close_match) {
                 $in_skipped_tag = true;
             } else {
                 $in_skipped_tag = false;
             }
         } else {
             if ($in_skipped_tag) {
                 $result[] = $token[1];
             } else {
                 $result[] = preg_replace_callback($cap_finder, array('Typogrify', '_cap_wrapper'), $token[1]);
             }
         }
     }
     return join("", $result);
 }

Example #4

0

Show file

File: markdown.php Project: BackupTheBerlios/nobunobuxoops-svn

function _EscapeSpecialChars($text)
{
    global $g_escape_table;
    $tokens = _TokenizeHTML($text);
    $text = '';
    # rebuild $text from the tokens
    $in_pre = 0;
    # Keep track of when we're inside <pre> or <code> tags.
    $tags_to_skip = "!<(/?)(?:pre|code|kbd|script)[\\s>]!";
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == 'tag') {
            $cur_token[1] = str_replace(array('*', '_'), array($g_escape_table['*'], $g_escape_table['_']), $cur_token[1]);
            $text .= $cur_token[1];
        } else {
            $t = $cur_token[1];
            if (!$in_pre) {
                $t = _EncodeBackslashEscapes($t);
                # $t =~ s{([a-z])/([a-z])}{$1&thinsp;/&thinsp;$2}ig;
            }
            $text .= $t;
        }
    }
    return $text;
}

Example #5

0

Show file

File: Markdown.php Project: BackupTheBerlios/milaninegw-svn

function _EscapeSpecialChars($text)
{
    global $md_escape_table;
    $tokens = _TokenizeHTML($text);
    $text = '';
    # rebuild $text from the tokens
    #	$in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
    #	$tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == 'tag') {
            # Within tags, encode * and _ so they don't conflict
            # with their use in Markdown for italics and strong.
            # We're replacing each such character with its
            # corresponding MD5 checksum value; this is likely
            # overkill, but it should prevent us from colliding
            # with the escape values by accident.
            $cur_token[1] = str_replace(array('*', '_'), array($md_escape_table['*'], $md_escape_table['_']), $cur_token[1]);
            $text .= $cur_token[1];
        } else {
            $t = $cur_token[1];
            $t = _EncodeBackslashEscapes($t);
            $text .= $t;
        }
    }
    return $text;
}

Example #6

0

Show file

File: smartypants.php Project: BackupTheBerlios/oos-svn

function SmartEllipses($text, $attr = NULL, $ctx = NULL)
{
    # Paramaters:
    $text;
    # text to be parsed
    $attr;
    # value of the smart_ellipses="" attribute
    $ctx;
    # MT context object (unused)
    if ($attr == NULL) {
        $attr = $smartypants_attr;
    }
    if ($attr == 0) {
        # do nothing;
        return $text;
    }
    $tokens;
    $tokens = _TokenizeHTML($text);
    $result = '';
    $in_pre = 0;
    # Keep track of when we're inside <pre> or <code> tags
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == "tag") {
            # Don't mess with quotes inside tags
            $result .= $cur_token[1];
            if (preg_match("@{$sp_tags_to_skip}@", $cur_token[1], $matches)) {
                $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
            }
        } else {
            $t = $cur_token[1];
            if (!$in_pre) {
                $t = ProcessEscapes($t);
                $t = EducateEllipses($t);
            }
            $result .= $t;
        }
    }
    return $result;
}

Example #7

0

Show file

File: smartypants.php Project: kreynen/elmsln

/**
 * space_hyphens
 *
 * Replaces a normal dash with em-dash between whitespaces.
 */
function typogrify_space_hyphens($text)
{
    $tokens;
    $tokens = _TokenizeHTML($text);
    $result = '';
    // Keep track of when we're inside <pre> or <code> tags.
    $in_pre = 0;
    foreach ($tokens as $cur_token) {
        if ($cur_token[0] == "tag") {
            // Don't mess with quotes inside tags.
            $result .= $cur_token[1];
            if (preg_match(SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
                $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
            }
        } else {
            $t = $cur_token[1];
            if (!$in_pre) {
                $t = preg_replace("/\\s(-{1,3})\\s/", '&#8239;—&thinsp;', $t);
            }
            $result .= $t;
        }
    }
    return $result;
}

PHP _TokenizeHTML Examples