示例#1
0
 public static function convertCharStr2CP($textString, $preserve, $pad, $type)
 {
     // converts a string of characters to code points, separated by space
     // textString: string, the string to convert
     // preserve: string enum [ascii, latin1], a set of characters to not convert
     // pad: boolean, if true, hex numbers lower than 1000 are padded with zeros
     // type: string enum[hex, dec, unicode, zerox], whether output should be in hex or dec or unicode U+ form
     $haut = 0;
     $n = 0;
     $CPString = '';
     $afterEscape = false;
     for ($i = 0; $i < mb_strlen($textString); $i++) {
         $b = Z_Unicode::charCodeAt($textString, $i);
         if ($b < 0 || $b > 0xffff) {
             throw new Exception('Error in convertChar2CP: byte out of range ' . dechex($b) . '!');
         }
         if ($haut != 0) {
             if (0xdc00 <= $b && $b <= 0xdfff) {
                 //alert('12345'.slice(-1).match(/[A-Fa-f0-9]/)+'<');
                 //if ($CPString.slice(-1).match(/[A-Za-z0-9]/) != null) { $CPString += ' '; }
                 if ($afterEscape) {
                     $CPString .= ' ';
                 }
                 if (type == 'hex') {
                     $CPString .= dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00));
                 } else {
                     if (type == 'unicode') {
                         $CPString .= 'U+' + dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00));
                     } else {
                         if (type == 'zerox') {
                             $CPString .= '0x' + dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00));
                         } else {
                             $CPString .= 0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00);
                         }
                     }
                 }
                 $haut = 0;
                 continue;
             } else {
                 throw new Exception('Error in convertChar2CP: surrogate out of range ' . dechex($haut) . '!');
             }
         }
         if (0xd800 <= $b && $b <= 0xdbff) {
             $haut = $b;
         } else {
             if ($b <= 127 && $preserve == 'ascii') {
                 $CPString .= Z_Unicode::charAt($textString, $i);
                 $afterEscape = false;
             } else {
                 if ($b <= 255 && $preserve == 'latin1') {
                     $CPString .= Z_Unicode::charAt($textString, $i);
                     $afterEscape = false;
                 } else {
                     //if ($CPString.slice(-1).match(/[A-Za-z0-9]/) != null) { $CPString += ' '; }
                     if ($afterEscape) {
                         $CPString .= ' ';
                     }
                     if ($type == 'hex') {
                         $cp = dechex($b);
                         if ($pad) {
                             while (strlen($cp) < 4) {
                                 $cp = '0' . $cp;
                             }
                         }
                     } else {
                         if ($type == 'unicode') {
                             $cp = dechex($b);
                             if ($pad) {
                                 while (strlen($length) < 4) {
                                     $cp = '0' . $cp;
                                 }
                             }
                             $CPString .= 'U+';
                         } else {
                             if ($type == 'zerox') {
                                 $cp = dechex($b);
                                 if ($pad) {
                                     while (strlen($cp) < 4) {
                                         $cp = '0' . $cp;
                                     }
                                 }
                                 $CPString .= '0x';
                             } else {
                                 $cp = $b;
                             }
                         }
                     }
                     $CPString .= $cp;
                     $afterEscape = true;
                 }
             }
         }
     }
     return strtoupper($CPString);
 }
示例#2
0
 private static function utf8_decode($utftext)
 {
     $string = "";
     $i = 0;
     $c = $c1 = $c2 = 0;
     while ($i < mb_strlen($utftext)) {
         $c = Z_Unicode::charCodeAt($utftext, $i);
         if ($c < 128) {
             $string .= Z_Unicode::fromCharCode($c);
             $i++;
         } else {
             if ($c > 191 && $c < 224) {
                 $c2 = Z_Unicode::charCodeAt($utftext, $i + 1);
                 $string .= Z_Unicode::fromCharCode(($c & 31) << 6 | $c2 & 63);
                 $i += 2;
             } else {
                 $c2 = Z_Unicode::charCodeAt($utftext, $i + 1);
                 $c3 = Z_Unicode::charCodeAt($utftext, $i + 2);
                 $string .= Z_Unicode::fromCharCode(($c & 15) << 12 | ($c2 & 63) << 6 | $c3 & 63);
                 $i += 3;
             }
         }
     }
     return $string;
 }