public function testUtf8_to_numeric() { $s = 'é'; $as = 0xe9; $rs = utf8::utf8_to_numeric($s); $this->assertEquals($as, $rs, '1 - '); $s = '5'; $as = 0x35; $rs = utf8::utf8_to_numeric($s); $this->assertEquals($as, $rs, '2 - '); }
public static function iciUnicodeToNoAipaitai($text) { $cnt = mb_strlen($text, 'UTF-8'); $text_noaipaitai = ""; for ($i = 0; $i < $cnt; $i++) { $c = mb_substr($text, $i, 1, 'UTF-8'); $nc = utf8::utf8_to_numeric($c); if (array_key_exists($nc, self::$ais_in_unicode)) { $a = self::$ais_in_unicode[$nc]['UCnoaipaitai']; $utf8 = utf8::numeric_to_utf8($a); $text_noaipaitai .= $utf8; } else { $text_noaipaitai .= $c; } } return $text_noaipaitai; }
public static function legacyToUnicode($text, $aipaitai) { $cs = array(); $dot = FALSE; $i = 0; $l = mb_strlen($text); $text_transcode = ''; while ($i < $l) { $c = mb_substr($text, $i, 1); $nc = utf8::utf8_to_numeric($c); if ($nc == ord('w')) { $d = 0x1403; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('W')) { $d = 0x1431; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('t')) { $d = 0x144e; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('r')) { # ki $d = 0x146d; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x157f; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('Q')) { # gi $d = 0x148b; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { # ng+gi array_pop($cs); $d = 0x158f; # ngi } elseif ($e == 0x1596) { #nng+gi array_pop($cs); $d = 0x1671; # nngi } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('u')) { $d = 0x14a5; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('i')) { $d = 0x14c2; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('y')) { $d = 0x14ef; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('o')) { $d = 0x14d5; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('p')) { $d = 0x1528; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('F')) { $d = 0x1555; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('E')) { # ri $d = 0x1546; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+ri = r+ri (rri) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == ord('e')) { # qi $d = 0x157f; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qi = q+ki (qqi) array_pop($cs); array_push($cs, 0x1585); $d = 0x146d; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('q')) { # ngi $d = 0x158f; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1671; # nngi } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('T')) { $d = 0x1671; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('O')) { $d = 0x15a0; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == 0x2211) { $d = 0x1404; } elseif ($nc == 0x201e || $nc == 0x84) { $d = 0x1432; } elseif ($nc == 0x2020 || $nc == 0x86) { $d = 0x144f; } elseif ($nc == 0xae) { # kii $d = 0x146e; if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1580; } } elseif ($nc == 0x152 || $nc == 0x8c) { # gii $d = 0x148c; if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { # ng+gii array_pop($cs); $d = 0x1590; # ngii } elseif ($e == 0x1596) { # nng+gii array_pop($cs); $d = 0x1672; # nngii } } } elseif ($nc == 0xa8) { $d = 0x14a6; } elseif ($nc == 0xee) { $d = 0x14c3; } elseif ($nc == 0xa5) { $d = 0x14f0; } elseif ($nc == 0xf8) { $d = 0x14d6; } elseif ($nc == 0x3c0) { $d = 0x1529; } elseif ($nc == 0xcf) { $d = 0x1556; } elseif ($nc == 0xb4) { # rii $d = 0x1547; if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+rii = r+rii (rrii) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == 0xe9) { # qii $d = 0x1580; if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qii = q+kii (qqii) array_pop($cs); array_push($cs, 0x1585); $d = 0x146e; } } elseif ($nc == ord('1')) { # ng $d = 0x1595; if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x14d0 || $cs[count($cs) - 1] == 0x1595)) { array_pop($cs); $d = 0x1596; } } elseif ($nc == ord('3')) { $d = 0x1550; } elseif ($nc == ord('R')) { $d = 0x1596; } elseif ($nc == 0x153 || $nc == 0x9c) { # ngii $d = 0x1590; if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1672; # nngii } } } elseif ($nc == 0x2c7) { $d = 0x1672; } elseif ($nc == 0xd8) { $d = 0x15a1; } elseif ($nc == ord('s')) { $d = 0x1405; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('S')) { $d = 0x1433; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('g')) { $d = 0x1450; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('f')) { # ku $d = 0x146f; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1581; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('A')) { # gu $d = 0x148d; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { #ng+gu array_pop($cs); $d = 0x1591; # ngu } elseif ($e == 0x1596) { #nng+gu array_pop($cs); $d = 0x1673; # nngu } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('j')) { $d = 0x14a7; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('k')) { $d = 0x14c4; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('h')) { $d = 0x14f1; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('l')) { $d = 0x14d7; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('J')) { $d = 0x152a; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('K')) { $d = 0x1557; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('D')) { # ru $d = 0x1548; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } # ruu if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+ru = r+ru (rru) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == ord('d')) { # qu $d = 0x1581; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qu = q+ku (qqu) array_pop($cs); array_push($cs, 0x1585); $d = 0x146f; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('a')) { # ngu $d = 0x1591; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x14d0 || $cs[count($cs) - 1] == 0x1595)) { array_pop($cs); $d = 0x1673; # nngu } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('Y')) { $d = 0x1673; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('L')) { $d = 0x15a2; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == 0xdf) { $d = 0x1406; } elseif ($nc == 0xcd) { $d = 0x1434; } elseif ($nc == 0xa9) { $d = 0x1451; } elseif ($nc == 0x192 || $nc == 0x83) { # kuu $d = 0x1470; if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1582; } } elseif ($nc == 0xc5) { # guu $d = 0x148e; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { #ng+guu array_pop($cs); $d = 0x1592; # nguu } elseif ($e == 0x1596) { #nng+guu array_pop($cs); $d = 0x1674; # nnguu } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == 0x394) { $d = 0x14a8; } elseif ($nc == 0x2da) { $d = 0x14c5; } elseif ($nc == 0x2d9) { $d = 0x14f2; } elseif ($nc == 0xac) { $d = 0x14d8; } elseif ($nc == 0xd4) { $d = 0x152b; } elseif ($nc == 0xf000) { $d = 0x1558; } elseif ($nc == 0xce) { # ruu $d = 0x1549; if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+ruu = r+ruu (rruu) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == 0x2202) { # quu $d = 0x1582; if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+quu = q+kuu (qquu) array_pop($cs); array_push($cs, 0x1585); $d = 0x1470; } } elseif ($nc == 0xe5) { # nguu $d = 0x1592; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { #n, ng array_pop($cs); $d = 0x1674; # nnguu } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == 0xc1) { $d = 0x1674; } elseif ($nc == 0xd2) { $d = 0x15a3; } elseif ($nc == ord('x')) { # a $d = 0x140a; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1401; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('X')) { # pa $d = 0x1438; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x142f; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('b')) { # ta $d = 0x1455; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x144c; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('v')) { # ka $d = 0x1472; $k2q = FALSE; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1583; $k2q = TRUE; } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = $k2q ? 0x166f : 0x146b; # kai } else { array_push($cs, $d); $d = 0x1403; # ka+i } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('Z')) { # ga $d = 0x1490; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); $d = 0x1670; # ngai } else { $d = 0x1489; # gai } } else { if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); array_push($cs, 0x1593); # nga } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1596) { array_pop($cs); array_push($cs, 0x1675); # nnga } else { array_push($cs, $d); # ga } $d = 0x1403; # +i } $i = $j; } } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); $d = 0x1593; # nga } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1596) { array_pop($cs); $d = 0x1675; # nnga } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('m')) { # ma $d = 0x14aa; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14a3; } else { array_push($cs, $d); $d = 0x1403; # mai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('N')) { # na $d = 0x14c7; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14c0; } else { array_push($cs, $d); $d = 0x1403; # nai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('n')) { # sa $d = 0x14f4; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14ed; } else { array_push($cs, $d); $d = 0x1403; # sai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('M')) { # la $d = 0x14da; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14d3; } else { array_push($cs, $d); $d = 0x1403; # lai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('/')) { # ja $d = 0x152d; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1526; } else { array_push($cs, $d); $d = 0x1403; # jai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('?')) { # va $d = 0x1559; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1553; } else { array_push($cs, $d); $d = 0x1403; # vai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('C')) { # ra $d = 0x154b; if ($dot) { array_pop($cs); } if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+r_ = r+r_ (rr_) array_pop($cs); array_push($cs, 0x1550); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1542; } else { array_push($cs, $d); $d = 0x1403; # rai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('c')) { # qa $precr = FALSE; $d = 0x1583; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qa = q+ka (qqa) array_pop($cs); array_push($cs, 0x1585); $d = 0x1472; $precr = TRUE; } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == 'w') { if ($aipaitai) { if ($precr) { $d = 0x146b; # kai } else { $d = 0x166f; # qai } } else { array_push($cs, $d); $d = 0x1403; } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('z')) { # nga $d = 0x1593; if ($dot) { array_pop($cs); } $precnng = FALSE; if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1675; # nnga $precnng = TRUE; } } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { if ($precnng) { array_push($cs, 0x1596); $d = 0x1489; } else { $d = 0x1670; # ngai } } else { array_push($cs, $d); $d = 0x1403; # ngai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('U')) { # nnga $d = 0x1675; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { array_push($cs, 0x1596); $d = 0x1489; } else { array_push($cs, 0x1675); $d = 0x1403; # nngai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('I')) { $d = 0x15a4; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == 0x2db) { $d = 0x1439; } elseif ($nc == 0x222b) { $d = 0x1456; } elseif ($nc == 0x221a) { # kaa $d = 0x1473; if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1584; } } elseif ($nc == 0xb8) { $d = 0x1491; } elseif ($nc == 0x3bc) { $d = 0x14ab; } elseif ($nc == 0x2dc || $nc == 0x98) { $d = 0x14c8; } elseif ($nc == 0xf1) { $d = 0x14f5; } elseif ($nc == 0xc2) { $d = 0x14db; } elseif ($nc == 0xf7) { $d = 0x152e; } elseif ($nc == 0xbf) { $d = 0x155a; } elseif ($nc == 0xc7) { # raa $d = 0x154c; if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+rii = r+rii (rrii) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == 0xe7) { # qaa $d = 0x1584; if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qaa = q+kaa (qqaa) array_pop($cs); array_push($cs, 0x1585); $d = 0x1473; } } elseif ($nc == 0x3a9) { # ngaa $d = 0x1594; if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1676; # nngaa } } } elseif ($nc == 0xdc) { $d = 0x1676; } elseif ($nc == 0x2c6 || $nc == 0x88) { $d = 0x15a5; } elseif ($nc == ord('2')) { $d = 0x1449; } elseif ($nc == ord('5')) { $d = 0x1466; } elseif ($nc == ord('4')) { $d = 0x1483; } elseif ($nc == ord('[')) { $d = 0x14a1; } elseif ($nc == ord('7')) { $d = 0x14bb; } elseif ($nc == ord('8')) { $d = 0x14d0; } elseif ($nc == ord('{')) { $d = 0x1505; } elseif ($nc == ord('9')) { $d = 0x14ea; } elseif ($nc == ord('0')) { $d = 0x153e; } elseif ($nc == ord('=')) { $d = 0x155d; } elseif ($nc == ord('6')) { $d = 0x1585; } elseif ($nc == ord('P')) { $d = 0x15a6; } elseif ($nc == ord('B')) { $d = 0x157c; } elseif ($nc == ord('!')) { $d = ord('1'); } elseif ($nc == ord('@')) { $d = ord('2'); } elseif ($nc == ord('#')) { $d = ord('3'); } elseif ($nc == ord('$')) { $d = ord('4'); } elseif ($nc == ord('%')) { $d = ord('5'); } elseif ($nc == ord('^')) { $d = ord('6'); } elseif ($nc == ord('&')) { $d = ord('7'); } elseif ($nc == ord('*')) { $d = ord('8'); } elseif ($nc == ord('(')) { $d = ord('9'); } elseif ($nc == ord(')')) { $d = ord('0'); } elseif ($nc == ord('G')) { $d = ord('('); } elseif ($nc == ord('H')) { $d = ord(')'); } elseif ($nc == ord('V')) { $d = ord('?'); } elseif ($nc == ord('\\')) { $d = ord('/'); } elseif ($nc == 0xa1) { $d = ord('!'); } elseif ($nc == 0xa2) { $d = ord('$'); } elseif ($nc == 0xa3) { $d = ord('#'); } elseif ($nc == 0xa4) { $d = 0xae; } elseif ($nc == 0xa7) { $d = ord('*'); } elseif ($nc == 0xaa) { $d = ord('['); } elseif ($nc == 0xb0) { $d = 0xa9; } elseif ($nc == 0xb6) { $d = ord('&'); } elseif ($nc == 0xb7) { $d = 0xf7; } elseif ($nc == 0xba) { $d = ord(']'); } elseif ($nc == 0x131) { $d = ord('}'); } elseif ($nc == 0x2013) { $d = 0xd7; } elseif ($nc == 0x96) { $d = 0xd7; } elseif ($nc == 0x201a) { $d = ord('+'); } elseif ($nc == 0x82) { $d = ord('+'); } elseif ($nc == 0x2021) { $d = 0x2154; } elseif ($nc == 0x87) { $d = 0x2154; } elseif ($nc == 0x2039) { $d = 0xbc; } elseif ($nc == 0x8b) { $d = 0xbc; } elseif ($nc == 0x203a) { $d = 0xbd; } elseif ($nc == 0x9b) { $d = 0xbd; } elseif ($nc == 0x2044) { $d = 0xa2; } elseif ($nc == 0x2260) { $d = ord('='); } elseif ($nc == 0x25ca) { $d = ord('{'); } elseif ($nc == 0xf001) { $d = 0xbe; } elseif ($nc == 0xf002) { $d = 0x2153; } elseif ($nc == 0x85) { $d = 0x2026; } elseif ($nc == 0x91) { $d = 0x2018; } elseif ($nc == 0x92) { $d = 0x2019; } elseif ($nc == 0x93) { $d = 0x201c; } elseif ($nc == 0x94) { $d = 0x201d; } elseif ($nc == 0x95) { $d = 0x2022; } elseif ($nc == 0x97) { $d = 0x2014; } elseif ($nc == 0x99) { $d = 0x2122; } elseif ($nc == ord('|') || $nc == ord('}') || $nc == ord('`') || $nc == ord('+') || $nc == ord(']') || $nc == ord('~')) { $d = ord($c); $dot = TRUE; } else { $dot = FALSE; $d = $nc; } $i++; array_push($cs, $d); } $out = ''; foreach ($cs as $n) { $str = utf8::numeric_to_utf8($n); $out .= $str; } return $out; }
public static function legacyToUnicode($text, $aipaitai) { $cs = array(); $dot = FALSE; $i = 0; $l = mb_strlen($text); while ($i < $l) { $c = mb_substr($text, $i, 1); $nc = utf8::utf8_to_numeric($c); if ($nc == ord('w')) { $d = 0x1403; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('W')) { $d = 0x1431; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('t')) { $d = 0x144e; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('r')) { # ki $d = 0x146d; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x157f; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('Q')) { # gi $d = 0x148b; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { # ng+gi array_pop($cs); $d = 0x158f; # ngi } elseif ($e == 0x1596) { #nng+gi array_pop($cs); $d = 0x1671; # nngi } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('u')) { $d = 0x14a5; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('i')) { $d = 0x14c2; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('y')) { $d = 0x14ef; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('o')) { $d = 0x14d5; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('p')) { $d = 0x1528; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('=')) { $d = 0x1555; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('E')) { # ri $d = 0x1546; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+ri = r+ri (rri) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == ord('e')) { # qi $d = 0x157f; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qi = q+ki (qqi) array_pop($cs); array_push($cs, 0x1585); $d = 0x146d; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('q')) { # ngi $d = 0x158f; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1671; # nngi } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('O')) { $d = 0x15a0; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('s')) { $d = 0x1405; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('S')) { $d = 0x1433; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('g')) { $d = 0x1450; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('f')) { # ku $d = 0x146f; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1581; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('A')) { # gu $d = 0x148d; if ($dot) { array_pop($cs); } if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x1595) { #ng+gu array_pop($cs); $d = 0x1591; # ngu } elseif ($e == 0x1596) { #nng+gu array_pop($cs); $d = 0x1673; # nngu } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('j')) { $d = 0x14a7; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('k')) { $d = 0x14c4; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('h')) { $d = 0x14f1; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('l')) { $d = 0x14d7; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('J')) { $d = 0x152a; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('K')) { $d = 0x1557; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('D')) { # ru $d = 0x1548; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } # ruu if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+ru = r+ru (rru) array_pop($cs); array_push($cs, 0x1550); } } elseif ($nc == ord('d')) { # qu $d = 0x1581; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qu = q+ku (qqu) array_pop($cs); array_push($cs, 0x1585); $d = 0x146f; } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('a')) { # ngu $d = 0x1591; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x14d0 || $cs[count($cs) - 1] == 0x1595)) { array_pop($cs); $d = 0x1673; # nngu } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('L')) { $d = 0x15a2; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('x')) { # a $d = 0x140a; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == 'w') { if ($aipaitai) { $d = 0x1401; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('X')) { # pa $d = 0x1438; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == 'w') { if ($aipaitai) { $d = 0x142f; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('b')) { # ta $d = 0x1455; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == 'w') { if ($aipaitai) { $d = 0x144c; } else { array_push($cs, $d); $d = 0x1403; # ai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('v')) { # ka $d = 0x1472; $k2q = FALSE; if ($dot) { array_pop($cs); } if (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1550) { array_pop($cs); array_pop($cs); array_push($cs, 0x1585); # rrka > qqa } elseif (count($cs) > 1 && $cs[count($cs) - 1] == 0x1550 && $cs[count($cs) - 2] == 0x1585) { array_pop($cs); # rqka > rka (???) } elseif (count($cs) > 0 && $cs[count($cs) - 1] == 0x1550) { array_pop($cs); $d = 0x1583; # rka > qa $k2q = TRUE; } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = $k2q ? 0x166f : 0x146b; # kai } else { array_push($cs, $d); $d = 0x1403; # ka+i } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('Z')) { # ga $d = 0x1490; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); $d = 0x1670; # ngai } else { $d = 0x1489; # gai } } else { if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); array_push($cs, 0x1593); # nga } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1596) { array_pop($cs); array_push($cs, 0x1675); # nnga } else { array_push($cs, $d); # ga } $d = 0x1403; # +i } $i = $j; } } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1595) { array_pop($cs); $d = 0x1593; # nga } elseif (count($cs) != 0 && $cs[count($cs) - 1] == 0x1596) { array_pop($cs); $d = 0x1675; # nnga } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('m')) { # ma $d = 0x14aa; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14a3; } else { array_push($cs, $d); $d = 0x1403; # mai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('N')) { # na $d = 0x14c7; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14c0; } else { array_push($cs, $d); $d = 0x1403; # nai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('n')) { # sa $d = 0x14f4; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14ed; } else { array_push($cs, $d); $d = 0x1403; # sai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('M')) { # la $d = 0x14da; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x14d3; } else { array_push($cs, $d); $d = 0x1403; # lai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('/')) { # ja $d = 0x152d; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1526; } else { array_push($cs, $d); $d = 0x1403; # jai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('?')) { # va $d = 0x1559; if ($dot) { array_pop($cs); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1553; } else { array_push($cs, $d); $d = 0x1403; # vai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('C')) { # ra $d = 0x154b; if ($dot) { array_pop($cs); } if (count($cs) != 0 && $cs[count($cs) - 1] == 0x1585) { # q+r_ = r+r_ (rr_) array_pop($cs); array_push($cs, 0x1550); } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { $d = 0x1542; } else { array_push($cs, $d); $d = 0x1403; # rai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('c')) { # qa $precr = FALSE; $d = 0x1583; if ($dot) { array_pop($cs); } if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x1550 || $cs[count($cs) - 1] == 0x1585)) { # r+qa = q+ka (qqa) array_pop($cs); array_push($cs, 0x1585); $d = 0x1472; $precr = TRUE; } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == ord('w')) { if ($aipaitai) { if ($precr) { $d = 0x146b; # kai } else { $d = 0x166f; # qai } } else { array_push($cs, $d); $d = 0x1403; } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('z')) { # nga $d = 0x1593; if ($dot) { array_pop($cs); } $precnng = FALSE; if (count($cs) != 0) { $e = $cs[count($cs) - 1]; if ($e == 0x14d0 || $e == 0x1595) { # n, ng array_pop($cs); $d = 0x1675; # nnga $precnng = TRUE; } } $j = $i + 1; if ($j < $l) { $e = mb_substr($text, $j, 1); if ($e == 'w') { if ($aipaitai) { if ($precnng) { array_push($cs, 0x1596); $d = 0x1489; } else { $d = 0x1670; # ngai } } else { array_push($cs, $d); $d = 0x1403; # ngai } $i = $j; } } if ($dot) { $dot = FALSE; $d++; } } elseif ($nc == ord('I')) { $d = 0x15a4; if ($dot) { $dot = FALSE; array_pop($cs); $d++; } } elseif ($nc == ord('2')) { $d = 0x1449; } elseif ($nc == ord('5')) { $d = 0x1466; } elseif ($nc == ord('4')) { $d = 0x1483; } elseif ($nc == ord('[')) { $d = 0x14a1; } elseif ($nc == ord('7')) { $d = 0x14bb; } elseif ($nc == ord('8')) { $d = 0x14d0; } elseif ($nc == ord('{')) { $d = 0x1505; } elseif ($nc == ord('9')) { $d = 0x14ea; } elseif ($nc == ord('0')) { $d = 0x153e; } elseif ($nc == ord('=')) { $d = 0x155d; } elseif ($nc == ord('3')) { $d = 0x1550; } elseif ($nc == ord('6')) { $d = 0x1585; } elseif ($nc == ord('1')) { # ng $d = 0x1595; if (count($cs) != 0 && ($cs[count($cs) - 1] == 0x14d0 || $cs[count($cs) - 1] == 0x1595)) { array_pop($cs); $d = 0x1596; # n ou ng + ng > nng } } elseif ($nc == ord('P')) { $d = 0x15a6; } elseif ($nc == ord('B')) { $d = 0x157c; } elseif ($nc == ord('!')) { $d = ord('1'); } elseif ($nc == ord('@')) { $d = ord('2'); } elseif ($nc == ord('#')) { $d = ord('3'); } elseif ($nc == ord('$')) { $d = ord('4'); } elseif ($nc == ord('%')) { $d = ord('5'); } elseif ($nc == ord('^')) { $d = ord('6'); } elseif ($nc == ord('&')) { $d = ord('7'); } elseif ($nc == ord('*')) { $d = ord('8'); } elseif ($nc == ord('(')) { $d = ord('9'); } elseif ($nc == ord(')')) { $d = ord('0'); } elseif ($nc == ord('+')) { $d = ord('='); } elseif ($nc == ord('-')) { $d = ord('%'); } elseif ($nc == ord('F')) { $d = ord('/'); } elseif ($nc == ord('G')) { $d = ord('('); } elseif ($nc == ord('H')) { $d = ord(')'); } elseif ($nc == ord('R')) { $d = ord('$'); } elseif ($nc == ord('T')) { $d = ord('+'); } elseif ($nc == ord('U')) { $d = ord('!'); } elseif ($nc == ord('V')) { $d = ord('?'); } elseif ($nc == ord('Y')) { $d = ord('_'); } elseif ($nc == ord('\\')) { $d = ord('}'); } elseif ($nc == ord('_')) { $d = ord('-'); } elseif ($nc == ord('|')) { $d = ord('{'); } elseif ($nc == 0x2018) { $d = ord('\''); } elseif ($nc == 0x2019) { $d = ord('\''); } elseif ($nc == 0x201c) { $d = ord('"'); } elseif ($nc == 0x201d) { $d = ord('"'); } elseif ($nc == ord('<') || $nc == ord('>') || $nc == ord('`') || $nc == ord(']') || $nc == ord('~')) { $d = ord($c); $dot = TRUE; } else { $dot = FALSE; $d = $nc; } $i++; array_push($cs, $d); } $out = ''; foreach ($cs as $n) { $str = utf8::numeric_to_utf8($n); $out .= $str; } return $out; }
function unicodeToSlashUUnicodeString($text) { $cnt = mb_strlen($text); $i = 0; $res = ''; while ($i < $cnt) { $c = mb_substr($text, $i++, 1); $nc = utf8::utf8_to_numeric($c); $res .= sprintf("\\u%04x", $nc); } return $res; }