public static function convertCharStr2CP($textString, $preserve, $pad, $type) { // converts a string of characters to code points, separated by space // textString: string, the string to convert // preserve: string enum [ascii, latin1], a set of characters to not convert // pad: boolean, if true, hex numbers lower than 1000 are padded with zeros // type: string enum[hex, dec, unicode, zerox], whether output should be in hex or dec or unicode U+ form $haut = 0; $n = 0; $CPString = ''; $afterEscape = false; for ($i = 0; $i < mb_strlen($textString); $i++) { $b = Z_Unicode::charCodeAt($textString, $i); if ($b < 0 || $b > 0xffff) { throw new Exception('Error in convertChar2CP: byte out of range ' . dechex($b) . '!'); } if ($haut != 0) { if (0xdc00 <= $b && $b <= 0xdfff) { //alert('12345'.slice(-1).match(/[A-Fa-f0-9]/)+'<'); //if ($CPString.slice(-1).match(/[A-Za-z0-9]/) != null) { $CPString += ' '; } if ($afterEscape) { $CPString .= ' '; } if (type == 'hex') { $CPString .= dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00)); } else { if (type == 'unicode') { $CPString .= 'U+' + dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00)); } else { if (type == 'zerox') { $CPString .= '0x' + dechex(0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00)); } else { $CPString .= 0x10000 . ($haut - 0xd800 << 10) . ($b - 0xdc00); } } } $haut = 0; continue; } else { throw new Exception('Error in convertChar2CP: surrogate out of range ' . dechex($haut) . '!'); } } if (0xd800 <= $b && $b <= 0xdbff) { $haut = $b; } else { if ($b <= 127 && $preserve == 'ascii') { $CPString .= Z_Unicode::charAt($textString, $i); $afterEscape = false; } else { if ($b <= 255 && $preserve == 'latin1') { $CPString .= Z_Unicode::charAt($textString, $i); $afterEscape = false; } else { //if ($CPString.slice(-1).match(/[A-Za-z0-9]/) != null) { $CPString += ' '; } if ($afterEscape) { $CPString .= ' '; } if ($type == 'hex') { $cp = dechex($b); if ($pad) { while (strlen($cp) < 4) { $cp = '0' . $cp; } } } else { if ($type == 'unicode') { $cp = dechex($b); if ($pad) { while (strlen($length) < 4) { $cp = '0' . $cp; } } $CPString .= 'U+'; } else { if ($type == 'zerox') { $cp = dechex($b); if ($pad) { while (strlen($cp) < 4) { $cp = '0' . $cp; } } $CPString .= '0x'; } else { $cp = $b; } } } $CPString .= $cp; $afterEscape = true; } } } } return strtoupper($CPString); }
private static function utf8_decode($utftext) { $string = ""; $i = 0; $c = $c1 = $c2 = 0; while ($i < mb_strlen($utftext)) { $c = Z_Unicode::charCodeAt($utftext, $i); if ($c < 128) { $string .= Z_Unicode::fromCharCode($c); $i++; } else { if ($c > 191 && $c < 224) { $c2 = Z_Unicode::charCodeAt($utftext, $i + 1); $string .= Z_Unicode::fromCharCode(($c & 31) << 6 | $c2 & 63); $i += 2; } else { $c2 = Z_Unicode::charCodeAt($utftext, $i + 1); $c3 = Z_Unicode::charCodeAt($utftext, $i + 2); $string .= Z_Unicode::fromCharCode(($c & 15) << 12 | ($c2 & 63) << 6 | $c3 & 63); $i += 3; } } } return $string; }