예제 #1
0
function utf8_strpos($haystack, $needle, $offset = 0)
{
    if (!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) {
        return mb_strpos($haystack, $needle, $offset, 'utf-8');
    }
    $haystack = utf8_to_unicode($haystack);
    $needle = utf8_to_unicode($needle);
    $position = $offset;
    $found = false;
    while (!$found && $position < count($haystack)) {
        if ($needle[0] == $haystack[$position]) {
            for ($i = 1; $i < count($needle); $i++) {
                if ($needle[$i] != $haystack[$position + $i]) {
                    break;
                }
            }
            // for
            if ($i == count($needle)) {
                $found = true;
                $position--;
            }
            // if
        }
        // if
        $position++;
    }
    // while
    return $found == true ? $position : false;
}
예제 #2
0
 function testString()
 {
     $unicode = array();
     $unicode[0] = 73;
     $unicode[1] = 241;
     $unicode[2] = 116;
     $unicode[3] = 235;
     $unicode[4] = 114;
     $unicode[5] = 110;
     $unicode[6] = 226;
     $unicode[7] = 116;
     $unicode[8] = 105;
     $unicode[9] = 244;
     $unicode[10] = 110;
     $unicode[11] = 224;
     $unicode[12] = 108;
     $unicode[13] = 105;
     $unicode[14] = 122;
     $unicode[15] = 230;
     $unicode[16] = 116;
     $unicode[17] = 105;
     $unicode[18] = 248;
     $unicode[19] = 110;
     $this->assertEqual(utf8_to_unicode('Iñtërnâtiônàlizætiøn'), $unicode);
 }
예제 #3
0
/**
 * Check whether a string is composed with chinese chars
 * @param string $str UTF8-encoded str
 * @return bool TRUE for chinese str
 */
function is_chinese($str)
{
    $unicode = utf8_to_unicode($str);
    foreach ($unicode as $char) {
        if (!detect_CJK($char)) {
            return FALSE;
        }
    }
    return TRUE;
}
예제 #4
0
function utf8_keepalphanum($string)
{
    global $UTF8_ALPHA_CHARS;
    $chars = utf8_to_unicode($string);
    for ($i = 0, $size = count($chars); $i < $size; ++$i) {
        if (!in_array($chars[$i], $UTF8_ALPHA_CHARS)) {
            unset($chars[$i]);
        }
    }
    return unicode_to_utf8($chars);
}
예제 #5
0
function utf8_keepalphanum($string)
{
    // a-z A-Z . _ -, extended latin chars, Cyrillic and Greek
    static $UTF8_ALPHA_CHARS = array(0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2e, 0x2d, 0x5f, 0x20, 0xc1, 0xe1, 0x106, 0x107, 0xc9, 0xe9, 0xcd, 0xed, 0x139, 0x13a, 0x143, 0x144, 0xd3, 0xf3, 0x154, 0x155, 0x15a, 0x15b, 0xda, 0xfa, 0xdd, 0xfd, 0x179, 0x17a, 0x10f, 0x13d, 0x13e, 0x165, 0x102, 0x103, 0x11e, 0x11f, 0x16c, 0x16d, 0x10c, 0x10d, 0x10e, 0x11a, 0x11b, 0x147, 0x148, 0x158, 0x159, 0x160, 0x161, 0x164, 0x17d, 0x17e, 0xc7, 0xe7, 0x122, 0x123, 0x136, 0x137, 0x13b, 0x13c, 0x145, 0x146, 0x156, 0x157, 0x15e, 0x15f, 0x162, 0x163, 0xc2, 0xe2, 0x108, 0x109, 0xca, 0xea, 0x11c, 0x11d, 0x124, 0x125, 0xce, 0xee, 0x134, 0x135, 0xd4, 0xf4, 0x15c, 0x15d, 0xdb, 0xfb, 0x174, 0x175, 0x176, 0x177, 0xc4, 0xe4, 0xcb, 0xeb, 0xcf, 0xef, 0xd6, 0xf6, 0xdc, 0xfc, 0x178, 0xff, 0x10a, 0x10b, 0x116, 0x117, 0x120, 0x121, 0x130, 0x131, 0x17b, 0x17c, 0x150, 0x151, 0x170, 0x171, 0xc0, 0xe0, 0xc8, 0xe8, 0xcc, 0xec, 0xd2, 0xf2, 0xd9, 0xf9, 0x1a0, 0x1a1, 0x1af, 0x1b0, 0x100, 0x101, 0x112, 0x113, 0x12a, 0x12b, 0x14c, 0x14d, 0x16a, 0x16b, 0x104, 0x105, 0x118, 0x119, 0x12e, 0x12f, 0x172, 0x173, 0xc5, 0xe5, 0x16e, 0x16f, 0x110, 0x111, 0x126, 0x127, 0x141, 0x142, 0xd8, 0xf8, 0xc3, 0xe3, 0xd1, 0xf1, 0xd5, 0xf5, 0xc6, 0xe6, 0x152, 0x153, 0xd0, 0xf0, 0xde, 0xfe, 0xdf, 0x17f, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f, 0x3a0, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x3aa, 0x3ab, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf, 0x3c0, 0x3c1, 0x3c3, 0x3c2, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3ca, 0x3cb, 0x390, 0x3b0, 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x401, 0x416, 0x417, 0x406, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, 0x420, 0x421, 0x422, 0x423, 0x40e, 0x424, 0x425, 0x426, 0x427, 0x428, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x451, 0x436, 0x437, 0x456, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, 0x440, 0x441, 0x442, 0x443, 0x45e, 0x444, 0x445, 0x446, 0x447, 0x448, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f, 0x418, 0x429, 0x42a, 0x438, 0x449, 0x44a, 0x403, 0x405, 0x408, 0x409, 0x40a, 0x40c, 0x40f, 0x453, 0x455, 0x458, 0x459, 0x45a, 0x45c, 0x45f, 0x402, 0x40b, 0x452, 0x45b, 0x490, 0x404, 0x407, 0x491, 0x454, 0x457, 0x4e8, 0x4ae, 0x4e9, 0x4af);
    $chars = utf8_to_unicode($string);
    for ($i = 0, $size = count($chars); $i < $size; ++$i) {
        if (!in_array($chars[$i], $UTF8_ALPHA_CHARS)) {
            unset($chars[$i]);
        }
    }
    return unicode_to_utf8($chars);
}
예제 #6
0
파일: case.php 프로젝트: stonyyi/anahita
/**
* UTF-8 aware alternative to strtoupper
* Make a string uppercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
* Note: requires utf8_to_unicode and utf8_from_unicode
* @author Andreas Gohr <*****@*****.**>
* @param string
* @return mixed either string in lowercase or FALSE is UTF-8 invalid
* @see http://www.php.net/strtoupper
* @see utf8_to_unicode
* @see utf8_from_unicode
* @see http://www.unicode.org/reports/tr21/tr21-5.html
* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php
* @package utf8
* @subpackage strings
*/
function utf8_strtoupper($string)
{
    global $UTF8_LOWER_TO_UPPER;
    $uni = utf8_to_unicode($string);
    if (!$uni) {
        return FALSE;
    }
    $cnt = count($uni);
    for ($i = 0; $i < $cnt; $i++) {
        if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]])) {
            $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]];
        }
    }
    return utf8_from_unicode($uni);
}
 function utf8_strtoupper($string)
 {
     static $lower_to_upper;
     if ($lower_to_upper == null) {
         $lower_to_upper = array(0x61 => 0x41, 0x3c6 => 0x3a6, 0x163 => 0x162, 0xe5 => 0xc5, 0x62 => 0x42, 0x13a => 0x139, 0xe1 => 0xc1, 0x142 => 0x141, 0x3cd => 0x38e, 0x101 => 0x100, 0x491 => 0x490, 0x3b4 => 0x394, 0x15b => 0x15a, 0x64 => 0x44, 0x3b3 => 0x393, 0xf4 => 0xd4, 0x44a => 0x42a, 0x439 => 0x419, 0x113 => 0x112, 0x43c => 0x41c, 0x15f => 0x15e, 0x144 => 0x143, 0xee => 0xce, 0x45e => 0x40e, 0x44f => 0x42f, 0x3ba => 0x39a, 0x155 => 0x154, 0x69 => 0x49, 0x73 => 0x53, 0x1e1f => 0x1e1e, 0x135 => 0x134, 0x447 => 0x427, 0x3c0 => 0x3a0, 0x438 => 0x418, 0xf3 => 0xd3, 0x440 => 0x420, 0x454 => 0x404, 0x435 => 0x415, 0x449 => 0x429, 0x14b => 0x14a, 0x431 => 0x411, 0x459 => 0x409, 0x1e03 => 0x1e02, 0xf6 => 0xd6, 0xf9 => 0xd9, 0x6e => 0x4e, 0x451 => 0x401, 0x3c4 => 0x3a4, 0x443 => 0x423, 0x15d => 0x15c, 0x453 => 0x403, 0x3c8 => 0x3a8, 0x159 => 0x158, 0x67 => 0x47, 0xe4 => 0xc4, 0x3ac => 0x386, 0x3ae => 0x389, 0x167 => 0x166, 0x3be => 0x39e, 0x165 => 0x164, 0x117 => 0x116, 0x109 => 0x108, 0x76 => 0x56, 0xfe => 0xde, 0x157 => 0x156, 0xfa => 0xda, 0x1e61 => 0x1e60, 0x1e83 => 0x1e82, 0xe2 => 0xc2, 0x119 => 0x118, 0x146 => 0x145, 0x70 => 0x50, 0x151 => 0x150, 0x44e => 0x42e, 0x129 => 0x128, 0x3c7 => 0x3a7, 0x13e => 0x13d, 0x442 => 0x422, 0x7a => 0x5a, 0x448 => 0x428, 0x3c1 => 0x3a1, 0x1e81 => 0x1e80, 0x16d => 0x16c, 0xf5 => 0xd5, 0x75 => 0x55, 0x177 => 0x176, 0xfc => 0xdc, 0x1e57 => 0x1e56, 0x3c3 => 0x3a3, 0x43a => 0x41a, 0x6d => 0x4d, 0x16b => 0x16a, 0x171 => 0x170, 0x444 => 0x424, 0xec => 0xcc, 0x169 => 0x168, 0x3bf => 0x39f, 0x6b => 0x4b, 0xf2 => 0xd2, 0xe0 => 0xc0, 0x434 => 0x414, 0x3c9 => 0x3a9, 0x1e6b => 0x1e6a, 0xe3 => 0xc3, 0x44d => 0x42d, 0x436 => 0x416, 0x1a1 => 0x1a0, 0x10d => 0x10c, 0x11d => 0x11c, 0xf0 => 0xd0, 0x13c => 0x13b, 0x45f => 0x40f, 0x45a => 0x40a, 0xe8 => 0xc8, 0x3c5 => 0x3a5, 0x66 => 0x46, 0xfd => 0xdd, 0x63 => 0x43, 0x21b => 0x21a, 0xea => 0xca, 0x3b9 => 0x399, 0x17a => 0x179, 0xef => 0xcf, 0x1b0 => 0x1af, 0x65 => 0x45, 0x3bb => 0x39b, 0x3b8 => 0x398, 0x3bc => 0x39c, 0x45c => 0x40c, 0x43f => 0x41f, 0x44c => 0x42c, 0xfe => 0xde, 0xf0 => 0xd0, 0x1ef3 => 0x1ef2, 0x68 => 0x48, 0xeb => 0xcb, 0x111 => 0x110, 0x433 => 0x413, 0x12f => 0x12e, 0xe6 => 0xc6, 0x78 => 0x58, 0x161 => 0x160, 0x16f => 0x16e, 0x3b1 => 0x391, 0x457 => 0x407, 0x173 => 0x172, 0xff => 0x178, 0x6f => 0x4f, 0x43b => 0x41b, 0x3b5 => 0x395, 0x445 => 0x425, 0x121 => 0x120, 0x17e => 0x17d, 0x17c => 0x17b, 0x3b6 => 0x396, 0x3b2 => 0x392, 0x3ad => 0x388, 0x1e85 => 0x1e84, 0x175 => 0x174, 0x71 => 0x51, 0x437 => 0x417, 0x1e0b => 0x1e0a, 0x148 => 0x147, 0x105 => 0x104, 0x458 => 0x408, 0x14d => 0x14c, 0xed => 0xcd, 0x79 => 0x59, 0x10b => 0x10a, 0x3ce => 0x38f, 0x72 => 0x52, 0x430 => 0x410, 0x455 => 0x405, 0x452 => 0x402, 0x127 => 0x126, 0x137 => 0x136, 0x12b => 0x12a, 0x3af => 0x38a, 0x44b => 0x42b, 0x6c => 0x4c, 0x3b7 => 0x397, 0x125 => 0x124, 0x219 => 0x218, 0xfb => 0xdb, 0x11f => 0x11e, 0x43e => 0x41e, 0x1e41 => 0x1e40, 0x3bd => 0x39d, 0x107 => 0x106, 0x3cb => 0x3ab, 0x446 => 0x426, 0xfe => 0xde, 0xe7 => 0xc7, 0x3ca => 0x3aa, 0x441 => 0x421, 0x432 => 0x412, 0x10f => 0x10e, 0xf8 => 0xd8, 0x77 => 0x57, 0x11b => 0x11a, 0x74 => 0x54, 0x6a => 0x4a, 0x45b => 0x40b, 0x456 => 0x406, 0x103 => 0x102, 0x3bb => 0x39b, 0xf1 => 0xd1, 0x43d => 0x41d, 0x3cc => 0x38c, 0xe9 => 0xc9, 0xf0 => 0xd0, 0x457 => 0x407, 0x123 => 0x122);
     }
     $unicode = utf8_to_unicode($string);
     if (!$unicode) {
         return false;
     }
     for ($i = 0; $i < count($unicode); $i++) {
         if (isset($lower_to_upper[$unicode[$i]])) {
             $unicode[$i] = $lower_to_upper[$unicode[$i]];
         }
     }
     return unicode_to_utf8($unicode);
 }
예제 #8
0
 /**
  * utf8strtolower
  *
  * @param string $string
  *
  * @note  Port of phputf8's utf8_strtolower()
  *
  * @return  string
  */
 public static function utf8strtolower($string)
 {
     static $UTF8_UPPER_TO_LOWER = null;
     if (is_null($UTF8_UPPER_TO_LOWER)) {
         $UTF8_UPPER_TO_LOWER = array(0x41 => 0x61, 0x3a6 => 0x3c6, 0x162 => 0x163, 0xc5 => 0xe5, 0x42 => 0x62, 0x139 => 0x13a, 0xc1 => 0xe1, 0x141 => 0x142, 0x38e => 0x3cd, 0x100 => 0x101, 0x490 => 0x491, 0x394 => 0x3b4, 0x15a => 0x15b, 0x44 => 0x64, 0x393 => 0x3b3, 0xd4 => 0xf4, 0x42a => 0x44a, 0x419 => 0x439, 0x112 => 0x113, 0x41c => 0x43c, 0x15e => 0x15f, 0x143 => 0x144, 0xce => 0xee, 0x40e => 0x45e, 0x42f => 0x44f, 0x39a => 0x3ba, 0x154 => 0x155, 0x49 => 0x69, 0x53 => 0x73, 0x1e1e => 0x1e1f, 0x134 => 0x135, 0x427 => 0x447, 0x3a0 => 0x3c0, 0x418 => 0x438, 0xd3 => 0xf3, 0x420 => 0x440, 0x404 => 0x454, 0x415 => 0x435, 0x429 => 0x449, 0x14a => 0x14b, 0x411 => 0x431, 0x409 => 0x459, 0x1e02 => 0x1e03, 0xd6 => 0xf6, 0xd9 => 0xf9, 0x4e => 0x6e, 0x401 => 0x451, 0x3a4 => 0x3c4, 0x423 => 0x443, 0x15c => 0x15d, 0x403 => 0x453, 0x3a8 => 0x3c8, 0x158 => 0x159, 0x47 => 0x67, 0xc4 => 0xe4, 0x386 => 0x3ac, 0x389 => 0x3ae, 0x166 => 0x167, 0x39e => 0x3be, 0x164 => 0x165, 0x116 => 0x117, 0x108 => 0x109, 0x56 => 0x76, 0xde => 0xfe, 0x156 => 0x157, 0xda => 0xfa, 0x1e60 => 0x1e61, 0x1e82 => 0x1e83, 0xc2 => 0xe2, 0x118 => 0x119, 0x145 => 0x146, 0x50 => 0x70, 0x150 => 0x151, 0x42e => 0x44e, 0x128 => 0x129, 0x3a7 => 0x3c7, 0x13d => 0x13e, 0x422 => 0x442, 0x5a => 0x7a, 0x428 => 0x448, 0x3a1 => 0x3c1, 0x1e80 => 0x1e81, 0x16c => 0x16d, 0xd5 => 0xf5, 0x55 => 0x75, 0x176 => 0x177, 0xdc => 0xfc, 0x1e56 => 0x1e57, 0x3a3 => 0x3c3, 0x41a => 0x43a, 0x4d => 0x6d, 0x16a => 0x16b, 0x170 => 0x171, 0x424 => 0x444, 0xcc => 0xec, 0x168 => 0x169, 0x39f => 0x3bf, 0x4b => 0x6b, 0xd2 => 0xf2, 0xc0 => 0xe0, 0x414 => 0x434, 0x3a9 => 0x3c9, 0x1e6a => 0x1e6b, 0xc3 => 0xe3, 0x42d => 0x44d, 0x416 => 0x436, 0x1a0 => 0x1a1, 0x10c => 0x10d, 0x11c => 0x11d, 0xd0 => 0xf0, 0x13b => 0x13c, 0x40f => 0x45f, 0x40a => 0x45a, 0xc8 => 0xe8, 0x3a5 => 0x3c5, 0x46 => 0x66, 0xdd => 0xfd, 0x43 => 0x63, 0x21a => 0x21b, 0xca => 0xea, 0x399 => 0x3b9, 0x179 => 0x17a, 0xcf => 0xef, 0x1af => 0x1b0, 0x45 => 0x65, 0x39b => 0x3bb, 0x398 => 0x3b8, 0x39c => 0x3bc, 0x40c => 0x45c, 0x41f => 0x43f, 0x42c => 0x44c, 0xde => 0xfe, 0xd0 => 0xf0, 0x1ef2 => 0x1ef3, 0x48 => 0x68, 0xcb => 0xeb, 0x110 => 0x111, 0x413 => 0x433, 0x12e => 0x12f, 0xc6 => 0xe6, 0x58 => 0x78, 0x160 => 0x161, 0x16e => 0x16f, 0x391 => 0x3b1, 0x407 => 0x457, 0x172 => 0x173, 0x178 => 0xff, 0x4f => 0x6f, 0x41b => 0x43b, 0x395 => 0x3b5, 0x425 => 0x445, 0x120 => 0x121, 0x17d => 0x17e, 0x17b => 0x17c, 0x396 => 0x3b6, 0x392 => 0x3b2, 0x388 => 0x3ad, 0x1e84 => 0x1e85, 0x174 => 0x175, 0x51 => 0x71, 0x417 => 0x437, 0x1e0a => 0x1e0b, 0x147 => 0x148, 0x104 => 0x105, 0x408 => 0x458, 0x14c => 0x14d, 0xcd => 0xed, 0x59 => 0x79, 0x10a => 0x10b, 0x38f => 0x3ce, 0x52 => 0x72, 0x410 => 0x430, 0x405 => 0x455, 0x402 => 0x452, 0x126 => 0x127, 0x136 => 0x137, 0x12a => 0x12b, 0x38a => 0x3af, 0x42b => 0x44b, 0x4c => 0x6c, 0x397 => 0x3b7, 0x124 => 0x125, 0x218 => 0x219, 0xdb => 0xfb, 0x11e => 0x11f, 0x41e => 0x43e, 0x1e40 => 0x1e41, 0x39d => 0x3bd, 0x106 => 0x107, 0x3ab => 0x3cb, 0x426 => 0x446, 0xde => 0xfe, 0xc7 => 0xe7, 0x3aa => 0x3ca, 0x421 => 0x441, 0x412 => 0x432, 0x10e => 0x10f, 0xd8 => 0xf8, 0x57 => 0x77, 0x11a => 0x11b, 0x54 => 0x74, 0x4a => 0x6a, 0x40b => 0x45b, 0x406 => 0x456, 0x102 => 0x103, 0x39b => 0x3bb, 0xd1 => 0xf1, 0x41d => 0x43d, 0x38c => 0x3cc, 0xc9 => 0xe9, 0xd0 => 0xf0, 0x407 => 0x457, 0x122 => 0x123);
     }
     $uni = utf8_to_unicode($string);
     if (!$uni) {
         return false;
     }
     $cnt = count($uni);
     for ($i = 0; $i < $cnt; $i++) {
         if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]])) {
             $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]];
         }
     }
     return static::fromUnicode($uni);
 }
예제 #9
0
function utf8_to_gbk($_obfuscate_lEJkeU8)
{
    global $UC2GBTABLE;
    $_obfuscate_0ZRpoQQÿ = "";
    if (empty($_obfuscate_M7zu18TTxzhvAÿÿ)) {
        $_obfuscate_JTe7jJ4eGW8ÿ = CODETABLEDIR . "gb-unicode.table";
        $_obfuscate_YBYÿ = fopen($_obfuscate_JTe7jJ4eGW8ÿ, "rb");
        while ($A = fgets($_obfuscate_YBYÿ, 15)) {
            $UC2GBTABLE[hexdec(substr($A, 7, 6))] = hexdec(substr($A, 0, 6));
        }
        fclose($_obfuscate_YBYÿ);
    }
    $_obfuscate_0ZRpoQQÿ = "";
    $_obfuscate_7ypN_Aÿÿ = strlen($_obfuscate_lEJkeU8);
    $_obfuscate_7wÿÿ = 0;
    for (; $_obfuscate_7wÿÿ < $_obfuscate_7ypN_Aÿÿ; ++$_obfuscate_7wÿÿ) {
        $_obfuscate_KQÿÿ = $_obfuscate_lEJkeU8[$_obfuscate_7wÿÿ];
        $_obfuscate_s7Uÿ = decbin(ord($_obfuscate_lEJkeU8[$_obfuscate_7wÿÿ]));
        if (strlen($_obfuscate_s7Uÿ) == 8) {
            $_obfuscate_TsNQCdQÿ = strpos(decbin(ord($_obfuscate_s7Uÿ)), "0");
            $_obfuscate_XAÿÿ = 0;
            for (; $_obfuscate_XAÿÿ < $_obfuscate_TsNQCdQÿ; ++$_obfuscate_XAÿÿ) {
                ++$_obfuscate_7wÿÿ;
                $_obfuscate_KQÿÿ .= $_obfuscate_lEJkeU8[$_obfuscate_7wÿÿ];
            }
            $_obfuscate_KQÿÿ = utf8_to_unicode($_obfuscate_KQÿÿ);
            if (isset($UC2GBTABLE[$_obfuscate_KQÿÿ])) {
                $_obfuscate_KQÿÿ = dechex($UC2GBTABLE[$_obfuscate_KQÿÿ] + 32896);
                $_obfuscate_0ZRpoQQÿ .= chr(hexdec($_obfuscate_KQÿÿ[0] . $_obfuscate_KQÿÿ[1])) . chr(hexdec($_obfuscate_KQÿÿ[2] . $_obfuscate_KQÿÿ[3]));
            } else {
                $_obfuscate_0ZRpoQQÿ .= "&#" . $_obfuscate_KQÿÿ . ";";
            }
        } else {
            $_obfuscate_0ZRpoQQÿ .= $_obfuscate_KQÿÿ;
        }
    }
    $_obfuscate_0ZRpoQQÿ = trim($_obfuscate_0ZRpoQQÿ);
    return $_obfuscate_0ZRpoQQÿ;
}
예제 #10
0
/**
 * utf8转gbk
 * @param $utfstr
 */
function utf8_to_gbk($utfstr)
{
    global $UC2GBTABLE;
    $okstr = '';
    if (empty($UC2GBTABLE)) {
        $filename = CODETABLEDIR . 'gb-unicode.table';
        $fp = fopen($filename, 'rb');
        while ($l = fgets($fp, 15)) {
            $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));
        }
        fclose($fp);
    }
    $okstr = '';
    $ulen = strlen($utfstr);
    for ($i = 0; $i < $ulen; $i++) {
        $c = $utfstr[$i];
        $cb = decbin(ord($utfstr[$i]));
        if (strlen($cb) == 8) {
            $csize = strpos(decbin(ord($cb)), '0');
            for ($j = 0; $j < $csize; $j++) {
                $i++;
                $c .= $utfstr[$i];
            }
            $c = utf8_to_unicode($c);
            if (isset($UC2GBTABLE[$c])) {
                $c = dechex($UC2GBTABLE[$c] + 0x8080);
                $okstr .= chr(hexdec($c[0] . $c[1])) . chr(hexdec($c[2] . $c[3]));
            } else {
                $okstr .= '&#' . $c . ';';
            }
        } else {
            $okstr .= $c;
        }
    }
    $okstr = trim($okstr);
    return $okstr;
}
예제 #11
0
 /** 
  * function detect_encoding($text)
  * Detects the encoding of a particular text
  * @return - one of GSM_7BIT, GSM_7BIT_EX, UTF16
  */
 public static function detect_encoding($text, &$ex_chars)
 {
     if (!is_array($text)) {
         $text = utf8_to_unicode($text);
     }
     $utf16_chars = array_diff($text, self::int_gsm_7bit_combined_map());
     if (count($utf16_chars)) {
         return self::UTF16;
     }
     $ex_chars = array_intersect($text, self::int_gsm_7bit_ex_map());
     if (count($ex_chars)) {
         return self::GSM_7BIT_EX;
     } else {
         return self::GSM_7BIT;
     }
 }
예제 #12
0
 function verbIrr($stem, &$match)
 {
     # 각종 규칙 불규칙 처리
     $ustem = utf8_to_unicode($stem);
     $uend = utf8_to_unicode($match[1]);
     $ch = array_pop($ustem);
     $ed = $uend[0];
     $save = '';
     if ($this->isHangul($ch)) {
         $j = hangul_to_jamo($ch);
         $ej = hangul_to_jamo($ed);
         $sj = sizeof($j);
         if ($sj == 3 and $j[2] == 0x11bb) {
             // 랐-다, 었-다, 겠-다, 였-다
             if (in_array($j[1], array(0x1161, 0x1165, 0x1166, 0x1167))) {
                 if ($j[0] == 0x1105 and in_array($j[1], array(0x1161, 0x1165, 0x1167))) {
                     // 랐,렀,렸
                     // 갈렸-다
                 } else {
                     if (in_array($j[0], array(0x1100, 0x110b, 0x110c))) {
                         # 겠,았
                         array_unshift($uend, $ch);
                         unset($ch);
                     } else {
                         if ($j[1] == 0x1167 and in_array($j[0], array(0x1101, 0x1102, 0x1103, 0x1105, 0x1106, 0x1107, 0x1109, 0x110c, 0x110e, 0x110f, 0x1110, 0x1111, 0x1112))) {
                             # 여 변환
                             // 혔 -> ㅎ+었 -> 히+었
                             $j[1] = 0x1165;
                             $syll = jamo_to_syllable(array(0x110b, $j[1], $j[2]));
                             array_unshift($uend, $syll[0]);
                             /* 혔 -> 히+었, 폈 -> 피+었 */
                             $j[1] = 0x1175;
                             $syll = jamo_to_syllable(array($j[0], $j[1]));
                             $ch = $syll[0];
                         } else {
                             if (in_array($j[0], array(0x1101, 0x1104, 0x110a, 0x1111, 0x1112))) {
                                 # 우 불규칙
                                 /* 떴 -> ㄸ + 었 */
                                 $syll = jamo_to_syllable(array(0x110b, $j[1], $j[2]));
                                 array_unshift($uend, $syll[0]);
                                 /* ㄸ -> 뜨 */
                                 $j[1] = 0x1173;
                                 /* ㅡ */
                                 if ($j[0] == 0x1111) {
                                     $j[1] = 0x116e;
                                 }
                                 /* 펐 푸+었 */
                                 jamo_to_syllable(array($j[0], $j[1]));
                                 /* 쓰 */
                                 $ch = $syll[0];
                             } else {
                                 if (in_array($j[0], array(0x1101, 0x1104, 0x110a, 0x1111, 0x1112))) {
                                 }
                             }
                         }
                     }
                 }
             } else {
                 if ($j[0] == 0x1112 and in_array($j[1], array(0x1162))) {
                     array_push($ustem, 0xd558);
                     /* 하 */
                     $syll = jamo_to_syllable(array(0x110b, 0x1167, 0x11bb));
                     array_unshift($uend, $syll[0]);
                     #$match[1]='여'.$match[1]; /* 해 -> 하 + 여 */
                     unset($ch);
                 } else {
                     /* ㅆ를 떼어낸다. */
                     #print '~~'.$stem.'~~';
                     $syll = jamo_to_syllable(array($j[0], $j[1]));
                     array_unshift($uend, $j[2]);
                     #array_unshift($uend,hangul_jongseong_to_cjamo($j[2]));
                     $ch = $syll[0];
                     unset($j[2]);
                     #unset($ch);
                 }
             }
             if (!$ch) {
                 $ch = array_pop($ustem);
                 $j = hangul_to_jamo($ch);
             }
             $ed = $uend[0];
             $ej = hangul_to_jamo($ed);
         } else {
             if (!empty($j[2]) and in_array($j[2], array(0x11ab, 0x11af, 0x11b8))) {
                 // 합-시다   갑-시다   갈-래
                 // 하-ㅂ시다 가-ㅂ시다 가-ㄹ래
                 //
                 if ($j[2] == 0x11af and $ej[0] == 0x1105) {
                     //if ($j[1] == 0x1173 and $j[2]== 0x11af and $ej[0]==0x1105) {
                     // 르 불규칙
                     // 흘-러:흐르+러
                     unset($j[2]);
                     $syll = jamo_to_syllable($j);
                     array_push($ustem, $syll[0]);
                     /* 흐 */
                     $j[0] = $ej[0];
                     $j[1] = 0x1173;
                     $syll = jamo_to_syllable($j);
                     /* 르 */
                     $ch = $syll[0];
                 } else {
                     array_unshift($uend, $j[2]);
                     $syll = jamo_to_syllable(array($j[0], $j[1]));
                     $ch = $syll[0];
                     $ed = $j[2];
                     unset($j[2]);
                 }
             }
         }
         // ㄷ 불규칙
         // 들-어 -> 듣-다
         $sj = sizeof($j);
         if ($sj == 3 and $j[2] == 0x11af and in_array($ej[0], array(0x110b, 0x1105))) {
             while (in_array($ej[1], array(0x1161, 0x1165, 0x1173))) {
                 // 아어으
                 // 라러르
                 $se = sizeof($ej);
                 if ($se == 3) {
                     if ($ej[1] == 0x1173 and !in_array($ej[2], 0x11ab, 0x11af)) {
                         break;
                     }
                     // 은을
                 } else {
                     if ($j[2] == 0x11af and sizeof($ej) == 2 and $ej[0] == 0x1105) {
                         break;
                     }
                 }
                 $syll = jamo_to_syllable(array($j[0], $j[1], 0x11ae));
                 $ch = $syll[0];
                 break;
             }
         }
         // ㅅ 불규칙
         // * 지-어:짓-어
         // * 이-어:잇-어
         if (sizeof($ej) == 2) {
             if ($ej[0] == 0x110b) {
                 $j[2] = 0x11ba;
                 $syll = jamo_to_syllable($j);
                 /* +ㅅ */
                 $ch = $syll[0];
                 $sj = 3;
             }
         }
         if ($sj == 2) {
             if (in_array($j[0], array(0x110c)) and in_array($j[1], array(0x116e, 0x1175))) {
                 /* 주, 지 */
                 array_unshift($uend, $ch);
                 unset($ch);
                 $ch = array_pop($ustem);
                 $j = hangul_to_jamo($ch);
             }
             if ($j[1] == 0x1165 and in_array($j[0], array(0x1101, 0x1104, 0x110a, 0x1111))) {
                 /* 꺼,떠,써,퍼 */
                 $syll = jamo_to_syllable(array(0x110b, 0x1165));
                 /* 어 */
                 array_unshift($uend, $syll[0]);
                 if ($j[0] == 0x1111) {
                     $syll = jamo_to_syllable(array($j[0], 0x116e));
                 } else {
                     $syll = jamo_to_syllable(array($j[0], 0x1173));
                 }
                 /* 쓰 */
                 array_push($ustem, $syll[0]);
                 unset($ch);
                 $ch = array_pop($ustem);
                 $j = hangul_to_jamo($ch);
             }
             // 음운 축약
             if (in_array($j[0], array(0x1105, 0x1112)) and $j[1] == 0x1162) {
                 // ㅎ 불규칙(어미) 파랗+아서 -> 파라+아서 -> 파래서
                 /* 파래-서 -> 파라-아서 */
                 $j[1] = 0x1161;
                 $syll = jamo_to_syllable($j);
                 /* 래 -> 라+ 아 */
                 $ch = $syll[0];
                 $syll = jamo_to_syllable(array(0x110b, 0x1161));
                 /* 아 */
                 $ed = $syll[0];
                 array_unshift($uend, $ed);
                 $ej[0] = 0x110b;
                 $ej[0] = 0x1161;
             } else {
                 if ($j[0] == 0x1112 and in_array($j[1], array(0x1162))) {
                     // 해-서 = 하-여서
                     $j[1] = 0x1161;
                     $syll = jamo_to_syllable($j);
                     /* 해 -> 하 + 여 */
                     $ch = $syll[0];
                     $syll = jamo_to_syllable(array(0x110b, 0x1167));
                     /* 여 */
                     $ed = $syll[0];
                     array_unshift($uend, $ed);
                     $ej[0] = 0x110b;
                     $ej[0] = 0x1167;
                 } else {
                     if (in_array($j[0], array(0x1105, 0x1109)) and in_array($j[1], array(0x1167))) {
                         // 하셔-서 = 하시-어서
                         // 가려-서 = 가리-어서
                         $j[1] = 0x1175;
                         /* ㅣ */
                         $syll = jamo_to_syllable($j);
                         /* ㅕ -> 이-어 */
                         $ch = $syll[0];
                         $syll = jamo_to_syllable(array(0x110b, 0x1165));
                         /* 어 */
                         $ed = $syll[0];
                         array_unshift($uend, $ed);
                         $ej[0] = 0x110b;
                         $ej[0] = 0x1165;
                     }
                 }
             }
             if ($j[0] == 0x1109 and $j[1] == 0x1175) {
                 /* 시: 존칭처리 */
                 array_unshift($uend, $ch);
                 $ej = $j;
                 $ch = array_pop($ustem);
                 $j = hangul_to_jamo($ch);
             }
             // ㅎ 불규칙
             if (in_array($j[0], array(0x1105, 0x1106)) and in_array($j[1], array(0x1161, 0x1165))) {
                 $syll = jamo_to_syllable(array($j[0], $j[1], 0x11c2));
                 /* 랗,렇 */
                 array_push($ustem, $syll[0]);
                 unset($ch);
                 unset($j);
             }
         }
         while ($sj == 2 and $j[0] == 0x110b and in_array($j[1], array(0x116a, 0x116e, 0x116f)) and sizeof($ustem) >= 1) {
             // XXX
             // 그리워: 그리우+어 -> 그립+워
             # /* 와 우 워 */
             $ch1 = array_pop($ustem);
             $jamo = hangul_to_jamo($ch1);
             if (sizeof($jamo) == 2) {
                 if ($jamo[1] != 0x1175) {
                     $syll = jamo_to_syllable(array($jamo[0], $jamo[1], 0x11b8));
                     array_push($ustem, $syll[0]);
                     /* add ㅂ */
                 } else {
                     array_push($ustem, $ch1);
                 }
                 array_unshift($uend, $ch);
                 unset($ch);
             } else {
                 array_push($ustem, $ch1);
             }
             break;
         }
         if ($ch) {
             array_push($ustem, $ch);
         }
         $match[1] = unicode_to_utf8($uend);
         return unicode_to_utf8($ustem);
     }
     $match[1] = $save . $match[1];
     return $stem;
     #print "<pre>";
     #print($word.'-'.$match[1]);
     #print_r($match);
 }
예제 #13
0
 /**
  * Convert an UTF-8 string to a safe ASCII String
  *
  *  conversion process
  *    - if codepoint is a plain or post_indicator character,
  *      - if previous character was "converted", append post_indicator to output, clear "converted" flag
  *      - append ascii byte for character to output
  *      (continue to next character)
  *
  *    - if codepoint is a pre_indicator character,
  *      - append ascii byte for character to output, set "converted" flag
  *      (continue to next character)
  *
  *    (all remaining characters)
  *    - reduce codepoint value for non-printable ASCII characters (0x00 - 0x1f).  Space becomes our zero.
  *    - convert reduced value to base36 (0-9a-z)
  *    - append $pre_indicator characater followed by base36 string to output, set converted flag
  *    (continue to next character)
  *
  * @param    string    $filename     a utf8 string, should only include printable characters - not 0x00-0x1f
  * @return   string    an encoded representation of $filename using only 'safe' ASCII characters
  *
  * @author   Christopher Smith <*****@*****.**>
  */
 public function encode($filename)
 {
     return self::unicode_to_safe(utf8_to_unicode($filename));
 }
예제 #14
0
파일: fn.php 프로젝트: laiello/ya-playsms
function gw_send_sms($mobile_sender, $sms_to, $sms_msg, $gp_code = "", $uid = "", $smslog_id = "", $msg_type = "text", $unicode = "0")
{
    global $clktl_param;
    global $gateway_number;
    if ($gateway_number) {
        $sms_from = $gateway_number;
    } else {
        $sms_from = $mobile_sender;
    }
    switch ($msg_type) {
        case "flash":
            $sms_type = "SMS_FLASH";
            break;
        case "logo":
            $sms_type = "SMS_NOKIA_OLOGO";
            break;
        case "picture":
            $sms_type = "SMS_NOKIA_PICTURE";
            break;
        case "ringtone":
        case "rtttl":
            $sms_type = "SMS_NOKIA_RTTTL";
            break;
        case "text":
        default:
            $sms_type = "SMS_TEXT";
    }
    // $query_string = "sendmsg?api_id=".$clktl_param[api_id]."&user="******"&password="******"&to=$sms_to&msg_type=$sms_type&text=".rawurlencode($sms_msg)."&deliv_ack=1&callback=3&unicode=$unicode&concat=3&from=".rawurlencode($sms_from);
    // no concat
    if ($unicode) {
        $sms_msg = utf8_to_unicode($sms_msg);
        $query_string = "sendmsg?api_id=" . $clktl_param[api_id] . "&user="******"&password="******"&to={$sms_to}&msg_type={$sms_type}&text={$sms_msg}&deliv_ack=1&callback=3&unicode={$unicode}&from=" . rawurlencode($sms_from);
    } else {
        $query_string = "sendmsg?api_id=" . $clktl_param[api_id] . "&user="******"&password="******"&to={$sms_to}&msg_type={$sms_type}&text=" . rawurlencode($sms_msg) . "&deliv_ack=1&callback=3&unicode={$unicode}&from=" . rawurlencode($sms_from);
    }
    $url = $clktl_param[send_url] . "/" . $query_string;
    $fd = file($url);
    $ok = false;
    $p_status = DLR_FAILED;
    if ($fd) {
        $response = split(":", $fd);
        $err_code = trim($response[1]);
        if (strtoupper($response[0]) == "ID") {
            if ($apimsgid = trim($response[1])) {
                clktl_setsmsapimsgid($smslog_id, $apimsgid);
                list($c_sms_credit, $c_sms_status) = clktl_getsmsstatus($smslog_id);
                if ($c_sms_status) {
                    $p_status = $c_sms_status;
                } else {
                    $p_status = DLR_PENDING;
                }
            } else {
                $p_status = DLR_SENT;
            }
        }
        $ok = true;
    }
    setsmsdeliverystatus($smslog_id, $uid, $p_status);
    return $ok;
}
예제 #15
0
/**
 * UTF-8 to UTF-16BE conversion.
 *
 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
 */
function utf8_to_utf16be(&$str, $bom = false)
{
    $out = $bom ? "��" : '';
    if (!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding')) {
        return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
    }
    $uni = utf8_to_unicode($str);
    foreach ($uni as $cp) {
        $out .= pack('n', $cp);
    }
    return $out;
}
예제 #16
0
파일: utf8.php 프로젝트: halfhope/ocStore
function utf8_strtoupper($string) {
	static $UTF8_LOWER_TO_UPPER = NULL;

	if (is_null($UTF8_LOWER_TO_UPPER)) {
		$UTF8_LOWER_TO_UPPER = array(
			0x0061 => 0x0041,
			0x03C6 => 0x03A6,
			0x0163 => 0x0162,
			0x00E5 => 0x00C5,
			0x0062 => 0x0042,
			0x013A => 0x0139,
			0x00E1 => 0x00C1,
			0x0142 => 0x0141,
			0x03CD => 0x038E,
			0x0101 => 0x0100,
			0x0491 => 0x0490,
			0x03B4 => 0x0394,
			0x015B => 0x015A,
			0x0064 => 0x0044,
			0x03B3 => 0x0393,
			0x00F4 => 0x00D4,
			0x044A => 0x042A,
			0x0439 => 0x0419,
			0x0113 => 0x0112,
			0x043C => 0x041C,
			0x015F => 0x015E,
			0x0144 => 0x0143,
			0x00EE => 0x00CE,
			0x045E => 0x040E,
			0x044F => 0x042F,
			0x03BA => 0x039A,
			0x0155 => 0x0154,
			0x0069 => 0x0049,
			0x0073 => 0x0053,
			0x1E1F => 0x1E1E,
			0x0135 => 0x0134,
			0x0447 => 0x0427,
			0x03C0 => 0x03A0,
			0x0438 => 0x0418,
			0x00F3 => 0x00D3,
			0x0440 => 0x0420,
			0x0454 => 0x0404,
			0x0435 => 0x0415,
			0x0449 => 0x0429,
			0x014B => 0x014A,
			0x0431 => 0x0411,
			0x0459 => 0x0409,
			0x1E03 => 0x1E02,
			0x00F6 => 0x00D6,
			0x00F9 => 0x00D9,
			0x006E => 0x004E,
			0x0451 => 0x0401,
			0x03C4 => 0x03A4,
			0x0443 => 0x0423,
			0x015D => 0x015C,
			0x0453 => 0x0403,
			0x03C8 => 0x03A8,
			0x0159 => 0x0158,
			0x0067 => 0x0047,
			0x00E4 => 0x00C4,
			0x03AC => 0x0386,
			0x03AE => 0x0389,
			0x0167 => 0x0166,
			0x03BE => 0x039E,
			0x0165 => 0x0164,
			0x0117 => 0x0116,
			0x0109 => 0x0108,
			0x0076 => 0x0056,
			0x00FE => 0x00DE,
			0x0157 => 0x0156,
			0x00FA => 0x00DA,
			0x1E61 => 0x1E60,
			0x1E83 => 0x1E82,
			0x00E2 => 0x00C2,
			0x0119 => 0x0118,
			0x0146 => 0x0145,
			0x0070 => 0x0050,
			0x0151 => 0x0150,
			0x044E => 0x042E,
			0x0129 => 0x0128,
			0x03C7 => 0x03A7,
			0x013E => 0x013D,
			0x0442 => 0x0422,
			0x007A => 0x005A,
			0x0448 => 0x0428,
			0x03C1 => 0x03A1,
			0x1E81 => 0x1E80,
			0x016D => 0x016C,
			0x00F5 => 0x00D5,
			0x0075 => 0x0055,
			0x0177 => 0x0176,
			0x00FC => 0x00DC,
			0x1E57 => 0x1E56,
			0x03C3 => 0x03A3,
			0x043A => 0x041A,
			0x006D => 0x004D,
			0x016B => 0x016A,
			0x0171 => 0x0170,
			0x0444 => 0x0424,
			0x00EC => 0x00CC,
			0x0169 => 0x0168,
			0x03BF => 0x039F,
			0x006B => 0x004B,
			0x00F2 => 0x00D2,
			0x00E0 => 0x00C0,
			0x0434 => 0x0414,
			0x03C9 => 0x03A9,
			0x1E6B => 0x1E6A,
			0x00E3 => 0x00C3,
			0x044D => 0x042D,
			0x0436 => 0x0416,
			0x01A1 => 0x01A0,
			0x010D => 0x010C,
			0x011D => 0x011C,
			0x00F0 => 0x00D0,
			0x013C => 0x013B,
			0x045F => 0x040F,
			0x045A => 0x040A,
			0x00E8 => 0x00C8,
			0x03C5 => 0x03A5,
			0x0066 => 0x0046,
			0x00FD => 0x00DD,
			0x0063 => 0x0043,
			0x021B => 0x021A,
			0x00EA => 0x00CA,
			0x03B9 => 0x0399,
			0x017A => 0x0179,
			0x00EF => 0x00CF,
			0x01B0 => 0x01AF,
			0x0065 => 0x0045,
			0x03BB => 0x039B,
			0x03B8 => 0x0398,
			0x03BC => 0x039C,
			0x045C => 0x040C,
			0x043F => 0x041F,
			0x044C => 0x042C,
			0x00FE => 0x00DE,
			0x00F0 => 0x00D0,
			0x1EF3 => 0x1EF2,
			0x0068 => 0x0048,
			0x00EB => 0x00CB,
			0x0111 => 0x0110,
			0x0433 => 0x0413,
			0x012F => 0x012E,
			0x00E6 => 0x00C6,
			0x0078 => 0x0058,
			0x0161 => 0x0160,
			0x016F => 0x016E,
			0x03B1 => 0x0391,
			0x0457 => 0x0407,
			0x0173 => 0x0172,
			0x00FF => 0x0178,
			0x006F => 0x004F,
			0x043B => 0x041B,
			0x03B5 => 0x0395,
			0x0445 => 0x0425,
			0x0121 => 0x0120,
			0x017E => 0x017D,
			0x017C => 0x017B,
			0x03B6 => 0x0396,
			0x03B2 => 0x0392,
			0x03AD => 0x0388,
			0x1E85 => 0x1E84,
			0x0175 => 0x0174,
			0x0071 => 0x0051,
			0x0437 => 0x0417,
			0x1E0B => 0x1E0A,
			0x0148 => 0x0147,
			0x0105 => 0x0104,
			0x0458 => 0x0408,
			0x014D => 0x014C,
			0x00ED => 0x00CD,
			0x0079 => 0x0059,
			0x010B => 0x010A,
			0x03CE => 0x038F,
			0x0072 => 0x0052,
			0x0430 => 0x0410,
			0x0455 => 0x0405,
			0x0452 => 0x0402,
			0x0127 => 0x0126,
			0x0137 => 0x0136,
			0x012B => 0x012A,
			0x03AF => 0x038A,
			0x044B => 0x042B,
			0x006C => 0x004C,
			0x03B7 => 0x0397,
			0x0125 => 0x0124,
			0x0219 => 0x0218,
			0x00FB => 0x00DB,
			0x011F => 0x011E,
			0x043E => 0x041E,
			0x1E41 => 0x1E40,
			0x03BD => 0x039D,
			0x0107 => 0x0106,
			0x03CB => 0x03AB,
			0x0446 => 0x0426,
			0x00FE => 0x00DE,
			0x00E7 => 0x00C7,
			0x03CA => 0x03AA,
			0x0441 => 0x0421,
			0x0432 => 0x0412,
			0x010F => 0x010E,
			0x00F8 => 0x00D8,
			0x0077 => 0x0057,
			0x011B => 0x011A,
			0x0074 => 0x0054,
			0x006A => 0x004A,
			0x045B => 0x040B,
			0x0456 => 0x0406,
			0x0103 => 0x0102,
			0x03BB => 0x039B,
			0x00F1 => 0x00D1,
			0x043D => 0x041D,
			0x03CC => 0x038C,
			0x00E9 => 0x00C9,
			0x00F0 => 0x00D0,
			0x0457 => 0x0407,
			0x0123 => 0x0122
		);
	}

	$unicode = utf8_to_unicode($string);

	if (!$unicode) {
		return false;
	}

	$count = count($unicode);

	for ($i = 0; $i < $count; $i++){
		if (isset($UTF8_LOWER_TO_UPPER[$unicode[$i]]) ) {
			$unicode[$i] = $UTF8_LOWER_TO_UPPER[$unicode[$i]];
		}
	}

	return utf8_from_unicode($unicode);
}
 function utf8_to_unicode($str)
 {
     return utf8_to_unicode($str);
 }
예제 #18
0
function utf8_hangul_getSearchRule($str, $lastchar = 1, $use_unicode = true)
{
    $rule = '';
    $val = utf8_to_unicode($str);
    $len = sizeof($val);
    if ($lastchar and $len > 1) {
        // make a regex using with the last char
        $last = array_pop($val);
        $rule = unicode_to_utf8($val);
        $val = array($last);
        $len = sizeof($val);
    }
    for ($i = 0; $i < $len; $i++) {
        $ch = $val[$i];
        $wch = array();
        $ustart = array();
        $uend = array();
        if ($ch >= 0xac00 and $ch <= 0xd7a3 or $ch >= 0x3130 and $ch <= 0x318f) {
            $wch = hangul_to_jamo(array($ch));
        } else {
            $rule .= unicode_to_utf8(array($ch));
            continue;
        }
        $wlen = sizeof($wch);
        $ket = '';
        if ($wlen >= 3) {
            // 종각 => 종(각|가[가-깋])
            $mrule = array();
            $mrule[] = unicode_to_utf8(array($ch));
            $save = $wch[2];
            unset($wch[2]);
            $tmp = jamo_to_syllable($wch);
            $mrule[] = unicode_to_utf8($tmp);
            $save = hangul_jongseong_to_cjamo($save);
            $wch = hangul_to_jamo($save);
            $wlen = sizeof($wch);
            $rule .= '(' . implode('|', $mrule);
            $ket = ')';
            if ($wlen > 1) {
                $rule .= ')';
                continue;
            }
        }
        if ($wlen == 1) {
            if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
                $wch[1] = 0x1161;
                $start = jamo_to_syllable($wch);
                $ustart = unicode_to_utf8($start);
                $wch[1] = 0x1175;
                $wch[2] = 0x11c2;
                $end = jamo_to_syllable($wch);
                $uend = unicode_to_utf8($end);
            } else {
                $rule .= unicode_to_utf8($wch) . $ket;
                continue;
            }
        } else {
            if ($wlen == 2) {
                if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
                    $start = jamo_to_syllable($wch);
                    $ustart = unicode_to_utf8($start);
                    $wch[2] = 0x11c2;
                    $end = jamo_to_syllable($wch);
                    $uend = unicode_to_utf8($end);
                } else {
                    $rule .= unicode_to_utf8($wch);
                    continue;
                }
            }
        }
        if ($use_unicode) {
            $crule = '[' . $ustart . '-' . $uend . ']';
        } else {
            $rule .= sprintf("\\x%02X", ord($ustart[0]));
            $crule = '';
            if ($ustart[1] == $uend[1]) {
                $crule .= sprintf("\\x%02X", ord($ustart[1]));
                $crule .= sprintf("[\\x%02X-\\x%02X]", ord($ustart[2]), ord($uend[2]));
            } else {
                $sch = ord($ustart[1]);
                $ech = ord($uend[1]);
                $subrule = array();
                $subrule[] = sprintf("\\x%02X[\\x%02X-\\xBF]", $sch, ord($ustart[2]));
                if ($sch + 1 == $ech - 1) {
                    $subrule[] = sprintf("\\x%02X[\\x80-\\xBF]", $sch + 1);
                } else {
                    if ($sch + 1 != $ech) {
                        $subrule[] = sprintf("[\\x%02X-\\x%02X][\\x80-\\xBF]", $sch + 1, $ech - 1);
                    }
                }
                $subrule[] = sprintf("\\x%02X[\\x80-\\x%02X]", ord($uend[1]), ord($uend[2]));
                $crule .= '(' . implode('|', $subrule) . ')';
            }
        }
        $rule .= $crule . $ket;
    }
    return $rule;
}
예제 #19
0
파일: run.php 프로젝트: laiello/tweetspider
function crawlStatus($currElement)
{
    $src = $hyperlink = $locn = $userURL = $followers = $rts = $rtu = null;
    //$currElement = $statusArray[$i];
    $createdAt = return_between($currElement, "<created_at>", "</created_at>", EXCL);
    //format the date to Database datetime type (for date based comparisons)
    $dtFormat = dateFormat($createdAt);
    $tempsid = split_string($currElement, "</created_at>", AFTER, EXCL);
    $tempsid = split_string($tempsid, "</id>", BEFORE, EXCL);
    $sid = split_string($tempsid, "<id>", AFTER, EXCL);
    $text = return_between($currElement, "<text>", "</text>", EXCL);
    //this and next functions called to handle unicode characters or non english text
    $text = utf8_to_unicode($text);
    $text = unicode_to_entities_preserving_ascii($text);
    //preg match to extract URL from tweets, if present (currently for http), match string can be modified for better handling
    $do = preg_match('@(https?://([-\\w\\.]+)+(:\\d+)?(/([\\w/_\\.]*(\\?\\S+)?)?)?)@', $text, $matches);
    if ($do = true) {
        //if url present
        $hyperlink = expandTinyURL(htmlentities($matches['0']));
    }
    //tweets usually contain tiny urls ->expansion needed
    $src = return_between($currElement, "<source>", "</source>", EXCL);
    $src = strip_tags($src);
    //gathering reply to information, if the tweet is a reply
    $rts = return_between($currElement, "<in_reply_to_status_id>", "</in_reply_to_status_id>", EXCL);
    $rtu = return_between($currElement, "<in_reply_to_user_id>", "</in_reply_to_user_id>", EXCL);
    //extracting user information as an array
    $userprofile = return_between($currElement, "<user>", "</user>", EXCL);
    $flag = 0;
    insertDB($sid, $text, $hyperlink, $dtFormat, $rts, $rtu, $src, $userprofile);
}
예제 #20
0
 while (!feof($handlein)) {
     $buffer = fgets($handlein, 8192);
     $encoding_replaced = false;
     //$i = 0;
     while (strpos($buffer, '$') !== false) {
         //echo $i++ . ": strpos=". (string)strpos($buffer, '$') ."\nbuffer: $buffer\n";
         $replaced = false;
         foreach ($translation as $symbol => $character) {
             $sym_pos = strpos($buffer, $symbol);
             if ($sym_pos !== false) {
                 $sym_length = strlen($symbol);
                 $piece1 = substr($buffer, 0, $sym_pos);
                 if ($character['switch']) {
                     // the character after the special charater needs to come before it
                     $partnerchar = utf8_encode($buffer[$sym_pos + $sym_length]);
                     $piece2 = unicode_to_utf8(array_merge(utf8_to_unicode($partnerchar), $character['unicode']));
                     $piece3start = $sym_pos + $sym_length + 1;
                 } else {
                     $piece2 = unicode_to_utf8($character['unicode']);
                     $piece3start = $sym_pos + $sym_length;
                 }
                 $piece2 = utf8_decode(UtfNormal::NFKC($piece2));
                 // strip out any ? characters, which are characters not existing in ISO-8859-1
                 $piece2 = str_replace('?', '', $piece2);
                 $piece3 = substr($buffer, $piece3start);
                 $buffer = $piece1 . $piece2 . $piece3;
                 $replaced = true;
                 continue;
             }
         }
         if (!$replaced) {
예제 #21
0
 /**
  * @dataProvider providerFailingUtf8ToUnicode
  */
 public function testFailingUtf8ToUnicodeReturnValue($str)
 {
     $this->assertFalse(@utf8_to_unicode($str, true));
 }
예제 #22
0
function encode($text)
{
    global $base, $tmin, $tmax, $skew, $damp, $initial_bias, $initial_n, $prefix, $delim;
    $text = utf8_to_unicode($text);
    $codecount = 0;
    $basic_string = "";
    $extended_string = "";
    for ($i = 0; $i < sizeof($text); $i++) {
        if ($text[$i] < $initial_n) {
            $basic_string .= chr($text[$i]);
            $codecount++;
        }
    }
    $n = $initial_n;
    $delta = 0;
    $bias = $initial_bias;
    $h = $codecount;
    while ($h < sizeof($text)) {
        $m = 100000;
        for ($j = 0; $j < sizeof($text); $j++) {
            if ($text[$j] >= $n && $text[$j] <= $m) {
                $m = $text[$j];
            }
        }
        $delta = $delta + ($m - $n) * ($h + 1);
        $n = $m;
        for ($j = 0; $j < sizeof($text); $j++) {
            $c = $text[$j];
            if ($c < $n) {
                $delta++;
            } elseif ($c == $n) {
                $q = $delta;
                for ($k = $base; 1; $k = $k + $base) {
                    $t = 0;
                    if ($k <= $bias + $tmin) {
                        $t = $tmin;
                    } elseif ($k >= $bias + $tmax) {
                        $t = $tmax;
                    } else {
                        $t = $k - $bias;
                    }
                    if ($q < $t) {
                        break;
                    }
                    $extended_string .= encode_digit($t + ($q - $t) % ($base - $t));
                    $q = floor(($q - $t) / ($base - $t));
                }
                $extended_string .= encode_digit($q);
                $bias = adapt($delta, $h + 1, $h == $codecount);
                $delta = 0;
                $h++;
            }
        }
        $delta++;
        $n++;
    }
    if (strlen($basic_string) > 0 && strlen($extended_string) < 1) {
        $encoded = $basic_string;
    } elseif (strlen($basic_string) > 0 && strlen($extended_string) > 0) {
        $encoded = $prefix . $basic_string . $delim . $extended_string;
    } elseif (strlen($basic_string) < 1 && strlen($extended_string) > 0) {
        $encoded = $prefix . $extended_string;
    }
    return $encoded;
}
예제 #23
0
 function test_from_4byte()
 {
     $in = "􀀁";
     $out = array(1048577);
     $this->assertEqual(utf8_to_unicode($in), $out);
 }
예제 #24
0
/**
 * utf8字符串分隔为unicode字符串
 * @param string $str 要转换的字符串
 * @param string $depart 分隔,默认为空格为单字
 * @return string
 */
function str_to_unicode_word($str, $depart = ' ')
{
    $arr = array();
    $str_len = mb_strlen($str, 'utf-8');
    for ($i = 0; $i < $str_len; $i++) {
        $s = mb_substr($str, $i, 1, 'utf-8');
        if ($s != ' ' && $s != ' ') {
            $arr[] = 'ux' . utf8_to_unicode($s);
        }
    }
    return implode($depart, $arr);
}
예제 #25
0
$total_words = 0;
$responseitem = "";
//
// Get the text from the client (POST or GET queries accepted)
//
$stxt = isset($_REQUEST['txt']) ? $_REQUEST['txt'] : FALSE;
//
//	If the user submitted text...
//
if ($stxt !== FALSE) {
    //
    // This turns all extended unicode characters into periods.
    // It makes the spellchecker ignore unicode characters without
    // upsetting the character indexes for spellchecker results.
    //
    $stxt = unicode_to_periods(utf8_to_unicode($stxt));
    //
    // Fire up ASpell and load the english dictionary.
    //
    // Possible reasons for this error:
    //  - The dictionary you're trying to load is not installed (see aspell.sourceforge.net)
    //  - Some other error beyond our control
    //       (PHP/Win32 sometimes has problems init'ing the library, trying again usually works.)
    //
    // Added 26-03-2005 - Retry feature.
    // Try loading the library up to 3 times before failing. Seems to work well,
    // except it increases the processing time for the script (the failed tries take some time.)
    //
    $psp = FALSE;
    $loaded_psp = FALSE;
    for ($i = 1; $i <= 3; $i++) {
	function filterHTML($codeContent)
	{
		global $state;
			
		// Array holds information about the current state of parsing 
		// e.g. the current tag, the last tag, etc.
		$state = array();
		
		// Clear the document in case function is called several times on one page
		clearDoc();

		// set up state defaults
		setState('abort_filtering', false); // used to abort filtering on encountering an unsupported tag
		setState('current_tag', "");		// the current tag being processed
		setState('in_list', false);			// whether the parser is currently inside a list (<ul> or <ol> etc...)
		setState('is_ordered_list', false);	// whether the current list being parsed is an <ol> (for preservation)
		setState('last_tag', "");			// the last tag that was processed
		setState('css', "");				// keeps track of css styles to add to the next p tag (used to grab textformat attributes)
		setState('empty', true);			// keeps track of whether an element is empty
		setState('depth', 0);				// keeps track of the depth of the current node
		setState('style_depth', 0);			// keeps track of the depth of the current styling node (used to remove redundant styles)
		setState('last_style_depth', 0);	// keeps track of the depth of the current styling node (used to remove redundant styles)
		setState('last_depth', 0);			// keeps track of the last depth processed
		setState('last_font', "");			// keeps track of the last font family (used to avoid redundant styles)
		setState('last_size', "");			// keeps track of the last font size  (used to avoid redundant styles)
		setState('last_color', "");			// keeps track of the last foreground color (used to avoid redundant styles)

		//
		// URL-Decode the incoming content (obedit will url-encode it before sending.)
		//
		$codeContent = urldecode($codeContent);
		
		//
		// Initialize the XML parser and set element handler functions
		//
		$xml_parser = xml_parser_create();
		
		xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
		xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, "UTF-8");
		xml_set_element_handler($xml_parser, "startElement", "endElement");
		xml_set_character_data_handler($xml_parser, "characterData");
	
		//
		// Wrap the obedit document in XML to prevent validation errors 
		// from the XML parser (this extra stuff is ignored by the filter and won't show up in the output)
		//
		$xml_data = "<?xml version='1.0'?><parserdocument>".$codeContent."</parserdocument>";
	
		//
		// Do the parsing. See the handler functions above.
		//
		$filterResult = @xml_parse($xml_parser, $xml_data, true);
	
		//
		// If the document is invalidly formed or not valid obedit-generated HTML (e.g. hand-coded), 
		// then spit out the originally submitted HTML (but still perform unicode conversion) 
		//
		if (!$filterResult || filterWasAborted())
		{
			clearDoc();
			add2Doc(unicode_to_entities(utf8_to_unicode($codeContent)));			
		}
		else
		{
			//
			// If we're here, the document is a valid RTE document and was successfully parsed and filtered.
			//
			// Now we just have to check if the document ended with a list, and if so, 
			// close the list off with the appropriate </ul> or </ol> to wrap things up.
			//
			if (isInsideList()) 
			{
				if (isOrderedList()) add2Doc("\n</ol>");
				else add2Doc("\n</ul>");
				setInsideList(false);
			}
		}
		
		// Free up memory
		xml_parser_free($xml_parser);
		
		// Return the final HTML document
		return getDoc();
	}
예제 #27
0
파일: utf8.php 프로젝트: reeze/dokuwiki
 /**
  * UTF-8 to UTF-16BE conversion.
  *
  * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
  */
 function utf8_to_utf16be(&$str, $bom = false)
 {
     $out = $bom ? "��" : '';
     if (UTF8_MBSTRING) {
         return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
     }
     $uni = utf8_to_unicode($str);
     foreach ($uni as $cp) {
         $out .= pack('n', $cp);
     }
     return $out;
 }
예제 #28
0
function utf8_hangul_getSearchRule($str, $lastchar = 1, $use_unicode = true)
{
    $rule = '';
    $val = utf8_to_unicode($str);
    $len = sizeof($val);
    if ($lastchar and $len > 1) {
        // make a regex using with the last char
        $last = array_pop($val);
        $rule = unicode_to_utf8($val);
        $val = array($last);
        $len = sizeof($val);
    } else {
        // make regex for consonant only letters
        // ㄱㅎ => (ㄱ|[가-깋])(ㅎ|[하-힣])
        // save the last char
        $last = array_pop($val);
        $len = sizeof($val);
        for ($i = 0; $i < $len; $i++) {
            $ch = $val[$i];
            if ($ch >= 0x3130 and $ch <= 0x318f) {
                $wch = hangul_to_jamo(array($ch));
                if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
                    $wch[1] = 0x1161;
                    $start = jamo_to_syllable($wch);
                    $ustart = unicode_to_utf8($start);
                    $wch[1] = 0x1175;
                    $wch[2] = 0x11c2;
                    $end = jamo_to_syllable($wch);
                    $uend = unicode_to_utf8($end);
                } else {
                    $rule .= unicode_to_utf8($wch);
                    continue;
                }
                $crule = '(' . unicode_to_utf8(array($ch)) . '|';
                $crule .= hangul_regex_range($ustart, $uend, $use_unicode);
                $crule .= ')';
            } else {
                $crule = unicode_to_utf8(array($ch));
            }
            $rule .= $crule;
        }
        // lastchar
        $val = array($last);
        $len = sizeof($val);
    }
    for ($i = 0; $i < $len; $i++) {
        $ch = $val[$i];
        $wch = array();
        $ustart = array();
        $uend = array();
        if ($ch >= 0xac00 and $ch <= 0xd7a3 or $ch >= 0x3130 and $ch <= 0x318f) {
            $wch = hangul_to_jamo(array($ch));
        } else {
            $rule .= unicode_to_utf8(array($ch));
            continue;
        }
        $wlen = sizeof($wch);
        $ket = '';
        if ($wlen >= 3) {
            // 종각 => 종(각|가[가-깋])
            $mrule = array();
            $mrule[] = unicode_to_utf8(array($ch));
            $save = $wch[2];
            unset($wch[2]);
            $tmp = jamo_to_syllable($wch);
            $mrule[] = unicode_to_utf8($tmp);
            $save = hangul_jongseong_to_cjamo($save);
            $wch = hangul_to_jamo($save);
            $wlen = sizeof($wch);
            $rule .= '(' . implode('|', $mrule);
            $ket = ')';
            if ($wlen > 1) {
                $rule .= ')';
                continue;
            }
        }
        if ($wlen == 1) {
            if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
                $wch[1] = 0x1161;
                $start = jamo_to_syllable($wch);
                $ustart = unicode_to_utf8($start);
                $wch[1] = 0x1175;
                $wch[2] = 0x11c2;
                $end = jamo_to_syllable($wch);
                $uend = unicode_to_utf8($end);
            } else {
                $rule .= unicode_to_utf8($wch) . $ket;
                continue;
            }
        } else {
            if ($wlen == 2) {
                if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
                    $start = jamo_to_syllable($wch);
                    $ustart = unicode_to_utf8($start);
                    $wch[2] = 0x11c2;
                    $end = jamo_to_syllable($wch);
                    $uend = unicode_to_utf8($end);
                } else {
                    $rule .= unicode_to_utf8($wch);
                    continue;
                }
            }
        }
        $crule = hangul_regex_range($ustart, $uend, $use_unicode);
        $rule .= $crule . $ket;
    }
    return $rule;
}