if ($k < 0x800) { $retval = pack('C2', 0xc0 | $k >> 6, 0x80 | $k & 0x3f); } else { if ($k < 0x10000) { $retval = pack('C3', 0xe0 | $k >> 12, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } else { $retval = pack('C4', 0xf0 | $k >> 18, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } } } return $retval; } for ($i = 0; $i < 0x1dfff; $i++) { if ($i >= 0xd800 && $i < 0xe000) { //surrogates continue; } $str = utf32_utf8($i); $result = htmlentities($str, ENT_QUOTES | ENT_HTML5, 'UTF-8'); if ($str != $result) { printf("%s\tU+%05X\n", $result, $i); } } /* multicodepoint entities */ $mpcent = array(array(0x3c, 0x20d2), array(0x3d, 0x20e5), array(0x3e, 0x20d2), array(0x66, 0x6a), array(0x205f, 0x200a), array(0x219d, 0x338), array(0x2202, 0x338), array(0x2220, 0x20d2), array(0x2229, 0xfe00), array(0x222a, 0xfe00), array(0x223c, 0x20d2), array(0x223d, 0x331), array(0x223e, 0x333), array(0x2242, 0x338), array(0x224b, 0x338), array(0x224d, 0x20d2), array(0x224e, 0x338), array(0x224f, 0x338), array(0x2250, 0x338), array(0x2261, 0x20e5), array(0x2264, 0x20d2), array(0x2265, 0x20d2), array(0x2266, 0x338), array(0x2267, 0x338), array(0x2268, 0xfe00), array(0x2269, 0xfe00), array(0x226a, 0x338), array(0x226a, 0x20d2), array(0x226b, 0x338), array(0x226b, 0x20d2), array(0x227f, 0x338), array(0x2282, 0x20d2), array(0x2283, 0x20d2), array(0x228a, 0xfe00), array(0x228b, 0xfe00), array(0x228f, 0x338), array(0x2290, 0x338), array(0x2293, 0xfe00), array(0x2294, 0xfe00), array(0x22b4, 0x20d2), array(0x22b5, 0x20d2), array(0x22d8, 0x338), array(0x22d9, 0x338), array(0x22da, 0xfe00), array(0x22db, 0xfe00), array(0x22f5, 0x338), array(0x22f9, 0x338), array(0x2933, 0x338), array(0x29cf, 0x338), array(0x29d0, 0x338), array(0x2a6d, 0x338), array(0x2a70, 0x338), array(0x2a7d, 0x338), array(0x2a7e, 0x338), array(0x2aa1, 0x338), array(0x2aa2, 0x338), array(0x2aac, 0xfe00), array(0x2aad, 0xfe00), array(0x2aaf, 0x338), array(0x2ab0, 0x338), array(0x2ac5, 0x338), array(0x2ac6, 0x338), array(0x2acb, 0xfe00), array(0x2acc, 0xfe00), array(0x2afd, 0xfe00)); foreach ($mpcent as $i) { $str = utf32_utf8($i[0]); $str .= utf32_utf8($i[1]); $result = htmlentities($str, ENT_QUOTES | ENT_HTML5, 'UTF-8'); printf("%s\tU+%05X U+%05X\n", $result, $i[0], $i[1]); }
$retval = pack('C3', 0xe0 | $k >> 12, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } else { if ($k < 0x200000) { $retval = pack('C4', 0xf0 | $k >> 18, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } else { if ($k < 0x4000000) { $retval = pack('C5', 0xf8 | $k >> 24, 0x80 | $k >> 18 & 0x3f, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } else { $retval = pack('C6', 0xfc | $k >> 30, 0x80 | $k >> 24 & 0x3f, 0x80 | $k >> 18 & 0x3f, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f); } } } } } return $retval; } $table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES, 'UTF-8'); for ($i = 0; $i < 0x2710; $i++) { if ($i >= 0xd800 && $i < 0xe000) { continue; } $str = utf32_utf8($i); if (isset($table[$str])) { printf("%s\tU+%05X\n", $table[$str], $i); unset($table[$str]); } } if (!empty($table)) { echo "Not matched entities: "; var_dump($table); }