示例#1
0
 /**
  * Convert a UTF-8 string to normal form KC, compatibility composition.
  * This may cause irreversible information loss, use judiciously.
  * Fast return for pure ASCII strings.
  *
  * @param $string String: a valid UTF-8 string. Input is not validated.
  * @return string a UTF-8 string in normal form KC
  */
 static function toNFKC($string)
 {
     if (NORMALIZE_INTL) {
         return normalizer_normalize($string, Normalizer::FORM_KC);
     } elseif (NORMALIZE_ICU) {
         return utf8_normalize($string, UNORM_NFKC);
     } elseif (preg_match('/[\\x80-\\xff]/', $string)) {
         return UtfNormal::NFKC($string);
     } else {
         return $string;
     }
 }
示例#2
0
     $replaced = false;
     foreach ($translation as $symbol => $character) {
         $sym_pos = strpos($buffer, $symbol);
         if ($sym_pos !== false) {
             $sym_length = strlen($symbol);
             $piece1 = substr($buffer, 0, $sym_pos);
             if ($character['switch']) {
                 // the character after the special charater needs to come before it
                 $partnerchar = utf8_encode($buffer[$sym_pos + $sym_length]);
                 $piece2 = unicode_to_utf8(array_merge(utf8_to_unicode($partnerchar), $character['unicode']));
                 $piece3start = $sym_pos + $sym_length + 1;
             } else {
                 $piece2 = unicode_to_utf8($character['unicode']);
                 $piece3start = $sym_pos + $sym_length;
             }
             $piece2 = utf8_decode(UtfNormal::NFKC($piece2));
             // strip out any ? characters, which are characters not existing in ISO-8859-1
             $piece2 = str_replace('?', '', $piece2);
             $piece3 = substr($buffer, $piece3start);
             $buffer = $piece1 . $piece2 . $piece3;
             $replaced = true;
             continue;
         }
     }
     if (!$replaced) {
         // we've encountered some character that we have no translation for
         echo "unable to find a translation to transform this buffer, the untranslatable code will be stripped out:\n{$buffer}\n";
         $pieces = preg_split('/\\$\\d*/', $buffer, 2);
         $buffer = implode('', $pieces);
     }
 }