Beispiel #1
0
/**
 * Return UTF-8 sequence for a given Unicode code point.
 *
 * @param int $codepoint
 * @return string
 * @throws InvalidArgumentException if fed out of range data.
 * @public
 * @deprecated since 1.25, use UtfNormal\Utils directly
 */
function codepointToUtf8($codepoint)
{
    return Utils::codepointToUtf8($codepoint);
}
Beispiel #2
0
$in = getFilePointer(__DIR__ . "/data/UnicodeData.txt", 'http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
$compatibilityDecomp = array();
$canonicalDecomp = array();
$canonicalComp = array();
$combiningClass = array();
$total = 0;
$compat = 0;
$canon = 0;
print "Reading character definitions...\n";
while (false !== ($line = fgets($in))) {
    $columns = explode(';', $line);
    $codepoint = $columns[0];
    $name = $columns[1];
    $canonicalCombiningClass = $columns[3];
    $decompositionMapping = $columns[5];
    $source = Utils::codepointToUtf8(hexdec($codepoint));
    if ($canonicalCombiningClass != 0) {
        $combiningClass[$source] = intval($canonicalCombiningClass);
    }
    if ($decompositionMapping === '') {
        continue;
    }
    if (preg_match('/^<(.+)> (.*)$/', $decompositionMapping, $matches)) {
        # Compatibility decomposition
        $canonical = false;
        $decompositionMapping = $matches[2];
        $compat++;
    } else {
        $canonical = true;
        $canon++;
    }