/** * Return UTF-8 sequence for a given Unicode code point. * * @param int $codepoint * @return string * @throws InvalidArgumentException if fed out of range data. * @public * @deprecated since 1.25, use UtfNormal\Utils directly */ function codepointToUtf8($codepoint) { return Utils::codepointToUtf8($codepoint); }
$in = getFilePointer(__DIR__ . "/data/UnicodeData.txt", 'http://www.unicode.org/Public/UNIDATA/UnicodeData.txt'); $compatibilityDecomp = array(); $canonicalDecomp = array(); $canonicalComp = array(); $combiningClass = array(); $total = 0; $compat = 0; $canon = 0; print "Reading character definitions...\n"; while (false !== ($line = fgets($in))) { $columns = explode(';', $line); $codepoint = $columns[0]; $name = $columns[1]; $canonicalCombiningClass = $columns[3]; $decompositionMapping = $columns[5]; $source = Utils::codepointToUtf8(hexdec($codepoint)); if ($canonicalCombiningClass != 0) { $combiningClass[$source] = intval($canonicalCombiningClass); } if ($decompositionMapping === '') { continue; } if (preg_match('/^<(.+)> (.*)$/', $decompositionMapping, $matches)) { # Compatibility decomposition $canonical = false; $decompositionMapping = $matches[2]; $compat++; } else { $canonical = true; $canon++; }