Beispiel #1
0
function print_utf8($ascii)
{
    $char = utf8_chr($ascii);
    if (preg_match("#[\\x00-\\x1F]|\\x7F|(?:\\xC2[\\x80-\\xA0])#", $char)) {
        return htmlentities("<control>");
    }
    return $char;
}
Beispiel #2
0
/**
 * Takes an array of ints representing the Unicode characters and returns
 * a UTF-8 string.
 *
 * @param array $array array of unicode code points representing a string
 * @return string UTF-8 character string
 */
function utf8_from_unicode($array)
{
    $str = '';
    foreach ($array as $value) {
        $str .= utf8_chr($value);
    }
    return $str;
}
function filter_named_entities(&$content)
{
    global $html_named_entities_mapping_mine;
    foreach ($html_named_entities_mapping_mine as $name => $value) {
        $content = str_replace('&' . $name . ';', utf8_chr($value), $content);
    }
    $content = str_replace('í', 'i', $content);
    # Ugly hack
}
Beispiel #4
0
 /**
  * @dataProvider providerFailingUtf8Chr
  */
 public function testFailingUtf8ChrReturnValue($cp)
 {
     $this->assertFalse(@utf8_chr($cp, true));
 }
Beispiel #5
0
/**
 * decode_ncr() 回调函数
 * 函数会忽略大部分 (不是全部) 错误的 NCR
 */
function utf8_from_ncr_callback($m)
{
    $cp = strncasecmp($m[1], 'x', 1) ? $m[1] : hexdec(substr($m[1], 1));
    return utf8_chr($cp);
}
Beispiel #6
0
/**
 * UTF-8 aware replacement for trim().
 *
 * Strip whitespace (or other characters) from the beginning and end of
 * a string.
 *
 * @param  string $str        The UTF-8 encoded string
 * @param  mixed  $stripchars The stripped characters
 * @param  int    $striptype  The optional argument $striptype can be
 *                            UTF8_STRIP_BOTH, UTF8_STRIP_LEFT, or UTF8_STRIP_RIGHT.
 *                            If $striptype is not specified it is assumed to be
 *                            UTF8_STRIP_BOTH.
 * @return string The stripped string
 */
function utf8_strip($str, $stripchars = null, $striptype = UTF8_STRIP_BOTH)
{
    static $defaults;
    global $unicode_separators_array;
    if ($stripchars === null) {
        if ($defaults === null) {
            foreach ($unicode_separators_array as $cp) {
                $defaults[] = utf8_chr($cp);
            }
        }
        $stripchars = $defaults;
    } elseif (is_array($stripchars)) {
        $chars = array();
        foreach ($stripchars as $char) {
            if (($char = utf8_get_char($char)) !== false) {
                $chars[] = $char;
            }
        }
        $stripchars = $chars;
    } else {
        $stripchars = utf8_split($stripchars, 1);
    }
    $left = $striptype & UTF8_STRIP_LEFT;
    $right = $striptype & UTF8_STRIP_RIGHT;
    $rv = $buffer = '';
    while ($char = utf8_get_char($str, $i)) {
        $state = in_array($char, $stripchars);
        if ($left) {
            if ($state) {
                continue;
            } else {
                $left = false;
            }
        }
        if ($right) {
            if ($state) {
                $buffer .= $char;
                continue;
            } else {
                $rv .= $buffer;
                $buffer = '';
            }
        }
        $rv .= $char;
    }
    return $rv;
}
        }
    }
    // do some tests for things that transform into something with the number one
    if (strpos($temp_hold, utf8_chr(0x31)) !== false) {
        // any kind of letter L?
        if (strpos($value[0], 'LETTER L') !== false || strpos($value[0], 'IOTA') !== false || strpos($value[0], 'SMALL L ') !== false || preg_match('/SMALL LIGATURE [^L]*L /', $value[0])) {
            // replace all of the mappings that transform some sort of letter l to number one instead to some sort of letter l to latin small letter l
            $temp_hold = str_replace(utf8_chr(0x31), utf8_chr(0x6c), $temp_hold);
        }
    }
    // uppercased chars that were folded do not exist in this universe,
    // no amount of normalization could ever "trick" this into not working
    if (in_array($value[1], $casefold_array[1])) {
        continue;
    }
    $uniarray[utf8_chr(hexdec((string) $value[1]))] = $temp_hold;
}
echo "Writing to confusables.{$phpEx}\n";
$fp = fopen($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx, 'wb');
fwrite($fp, '<?php return ' . my_var_export($uniarray) . ';');
fclose($fp);
/**
* Return a parsable string representation of a variable
*
* This is function is limited to array/strings/integers
*
* @param	mixed	$var		Variable
* @return	string				PHP code representing the variable
*/
function my_var_export($var)
{
    } else {
        if ($cp > 0x7ff) {
            return chr(0xe0 | $cp >> 12) . chr(0x80 | $cp >> 6 & 0x3f) . chr(0x80 | $cp & 0x3f);
        } else {
            if ($cp > 0x7f) {
                return chr(0xc0 | $cp >> 6) . chr(0x80 | $cp & 0x3f);
            } else {
                return chr($cp);
            }
        }
    }
}
preg_match_all('/^([0-9A-F]+); ([CFS]); ([0-9A-F]+(?: [0-9A-F]+)*);/im', $unidata, $array, PREG_SET_ORDER);
$uniarray = array();
foreach ($array as $value) {
    $uniarray[$value[2]][utf8_chr(hexdec((string) $value[1]))] = implode(array_map('utf8_chr', array_map('hexdec', explode(' ', $value[3]))));
}
foreach ($uniarray as $idx => $contents) {
    echo "Writing to case_fold_{$idx}.{$phpEx}\n";
    $fp = fopen($phpbb_root_path . 'includes/utf/data/case_fold_' . strtolower($idx) . '.' . $phpEx, 'wb');
    fwrite($fp, '<?php return ' . my_var_export($contents) . ';');
    fclose($fp);
}
/**
* Return a parsable string representation of a variable
*
* This is function is limited to array/strings/integers
*
* @param	mixed	$var		Variable
* @return	string				PHP code representing the variable
*/
Beispiel #9
0
/**
 * Callback function for utf8_decode_entities 
 * @param array
 * @return string
 */
function utf8_hexchr_callback($matches)
{
    return utf8_chr(hexdec($matches[1]));
}
/**
 * utf8_strrpos( )
 * 
 * Find position of last occurrence of a char in a UTF-8 string
 * @since 1.3
 * 
 * @param    string $haystack The string to search in
 * @param    string $needle The string to search for
 * @param    int $offset Number of char to ignore from start or end
 * @return   int THe position of last occurrance of needle
 */
function utf8_strrpos($haystack, $needle, $offset = 0)
{
    if ((int) $needle === $needle && $needle >= 0) {
        $needle = utf8_chr($needle);
    }
    $needle = utf8_clean((string) $needle);
    $offset = (int) $offset;
    $haystack = utf8_clean($haystack);
    if (mbstring_loaded()) {
        //mb_strrpos returns wrong position if invalid characters are found in $haystack before $needle
        return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
    }
    if (iconv_loaded() && $offset === 0) {
        //iconv_strrpos is not tolerant to invalid characters
        //iconv_strrpos does not accept $offset
        return iconv_strrpos($haystack, $needle, 'UTF-8');
    }
    if ($offset > 0) {
        $haystack = utf8_substr($haystack, $offset);
    } else {
        if ($offset < 0) {
            $haystack = utf8_substr($haystack, 0, $offset);
        }
    }
    if (($pos = strrpos($haystack, $needle)) !== false) {
        $left = substr($haystack, 0, $pos);
        return ($offset > 0 ? $offset : 0) + utf8_strlen($left);
    }
    return false;
}
Beispiel #11
0
/**
 * Returns a string with the first character of each word converted to
 * uppercase and the remainder to lowercase.
 *
 * @param  string $str The UTF-8 encoded string
 * @return string with the first character of each word converted to uppercase
 *                and the remainder to lowercase
 */
function utf8_capwords($str)
{
    $rv = '';
    $state = true;
    while (($char = utf8_get_char($str, $i, $cp)) !== false) {
        if (!($issep = unicode_is_separator($cp))) {
            $_ = $state ? unicode_upcase($cp) : unicode_downcase($cp);
            if ($_ != $cp) {
                $char = utf8_chr($_);
            }
        }
        $state = $issep;
        $rv .= $char;
    }
    return $rv;
}