if ($k < 0x800) {
            $retval = pack('C2', 0xc0 | $k >> 6, 0x80 | $k & 0x3f);
        } else {
            if ($k < 0x10000) {
                $retval = pack('C3', 0xe0 | $k >> 12, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
            } else {
                $retval = pack('C4', 0xf0 | $k >> 18, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
            }
        }
    }
    return $retval;
}
for ($i = 0; $i < 0x1dfff; $i++) {
    if ($i >= 0xd800 && $i < 0xe000) {
        //surrogates
        continue;
    }
    $str = utf32_utf8($i);
    $result = htmlentities($str, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    if ($str != $result) {
        printf("%s\tU+%05X\n", $result, $i);
    }
}
/* multicodepoint entities */
$mpcent = array(array(0x3c, 0x20d2), array(0x3d, 0x20e5), array(0x3e, 0x20d2), array(0x66, 0x6a), array(0x205f, 0x200a), array(0x219d, 0x338), array(0x2202, 0x338), array(0x2220, 0x20d2), array(0x2229, 0xfe00), array(0x222a, 0xfe00), array(0x223c, 0x20d2), array(0x223d, 0x331), array(0x223e, 0x333), array(0x2242, 0x338), array(0x224b, 0x338), array(0x224d, 0x20d2), array(0x224e, 0x338), array(0x224f, 0x338), array(0x2250, 0x338), array(0x2261, 0x20e5), array(0x2264, 0x20d2), array(0x2265, 0x20d2), array(0x2266, 0x338), array(0x2267, 0x338), array(0x2268, 0xfe00), array(0x2269, 0xfe00), array(0x226a, 0x338), array(0x226a, 0x20d2), array(0x226b, 0x338), array(0x226b, 0x20d2), array(0x227f, 0x338), array(0x2282, 0x20d2), array(0x2283, 0x20d2), array(0x228a, 0xfe00), array(0x228b, 0xfe00), array(0x228f, 0x338), array(0x2290, 0x338), array(0x2293, 0xfe00), array(0x2294, 0xfe00), array(0x22b4, 0x20d2), array(0x22b5, 0x20d2), array(0x22d8, 0x338), array(0x22d9, 0x338), array(0x22da, 0xfe00), array(0x22db, 0xfe00), array(0x22f5, 0x338), array(0x22f9, 0x338), array(0x2933, 0x338), array(0x29cf, 0x338), array(0x29d0, 0x338), array(0x2a6d, 0x338), array(0x2a70, 0x338), array(0x2a7d, 0x338), array(0x2a7e, 0x338), array(0x2aa1, 0x338), array(0x2aa2, 0x338), array(0x2aac, 0xfe00), array(0x2aad, 0xfe00), array(0x2aaf, 0x338), array(0x2ab0, 0x338), array(0x2ac5, 0x338), array(0x2ac6, 0x338), array(0x2acb, 0xfe00), array(0x2acc, 0xfe00), array(0x2afd, 0xfe00));
foreach ($mpcent as $i) {
    $str = utf32_utf8($i[0]);
    $str .= utf32_utf8($i[1]);
    $result = htmlentities($str, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    printf("%s\tU+%05X U+%05X\n", $result, $i[0], $i[1]);
}
Exemple #2
0
                $retval = pack('C3', 0xe0 | $k >> 12, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
            } else {
                if ($k < 0x200000) {
                    $retval = pack('C4', 0xf0 | $k >> 18, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
                } else {
                    if ($k < 0x4000000) {
                        $retval = pack('C5', 0xf8 | $k >> 24, 0x80 | $k >> 18 & 0x3f, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
                    } else {
                        $retval = pack('C6', 0xfc | $k >> 30, 0x80 | $k >> 24 & 0x3f, 0x80 | $k >> 18 & 0x3f, 0x80 | $k >> 12 & 0x3f, 0x80 | $k >> 6 & 0x3f, 0x80 | $k & 0x3f);
                    }
                }
            }
        }
    }
    return $retval;
}
$table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES, 'UTF-8');
for ($i = 0; $i < 0x2710; $i++) {
    if ($i >= 0xd800 && $i < 0xe000) {
        continue;
    }
    $str = utf32_utf8($i);
    if (isset($table[$str])) {
        printf("%s\tU+%05X\n", $table[$str], $i);
        unset($table[$str]);
    }
}
if (!empty($table)) {
    echo "Not matched entities: ";
    var_dump($table);
}