/** * Convert a UTF-8 string to normal form KC, compatibility composition. * This may cause irreversible information loss, use judiciously. * Fast return for pure ASCII strings. * * @param $string String: a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form KC */ static function toNFKC($string) { if (NORMALIZE_INTL) { return normalizer_normalize($string, Normalizer::FORM_KC); } elseif (NORMALIZE_ICU) { return utf8_normalize($string, UNORM_NFKC); } elseif (preg_match('/[\\x80-\\xff]/', $string)) { return UtfNormal::NFKC($string); } else { return $string; } }
$replaced = false; foreach ($translation as $symbol => $character) { $sym_pos = strpos($buffer, $symbol); if ($sym_pos !== false) { $sym_length = strlen($symbol); $piece1 = substr($buffer, 0, $sym_pos); if ($character['switch']) { // the character after the special charater needs to come before it $partnerchar = utf8_encode($buffer[$sym_pos + $sym_length]); $piece2 = unicode_to_utf8(array_merge(utf8_to_unicode($partnerchar), $character['unicode'])); $piece3start = $sym_pos + $sym_length + 1; } else { $piece2 = unicode_to_utf8($character['unicode']); $piece3start = $sym_pos + $sym_length; } $piece2 = utf8_decode(UtfNormal::NFKC($piece2)); // strip out any ? characters, which are characters not existing in ISO-8859-1 $piece2 = str_replace('?', '', $piece2); $piece3 = substr($buffer, $piece3start); $buffer = $piece1 . $piece2 . $piece3; $replaced = true; continue; } } if (!$replaced) { // we've encountered some character that we have no translation for echo "unable to find a translation to transform this buffer, the untranslatable code will be stripped out:\n{$buffer}\n"; $pieces = preg_split('/\\$\\d*/', $buffer, 2); $buffer = implode('', $pieces); } }