コード例 #1
0
ファイル: UtfNormal.php プロジェクト: GodelDesign/Godel
 /**
  * Convert a UTF-8 string to normal form C, canonical composition.
  * Fast return for pure ASCII strings; some lesser optimizations for
  * strings containing only known-good characters.
  *
  * @param $string String: a valid UTF-8 string. Input is not validated.
  * @return string a UTF-8 string in normal form C
  */
 static function toNFC($string)
 {
     if (NORMALIZE_INTL) {
         return normalizer_normalize($string, Normalizer::FORM_C);
     } elseif (NORMALIZE_ICU) {
         return utf8_normalize($string, UNORM_NFC);
     } elseif (UtfNormal::quickIsNFC($string)) {
         return $string;
     } else {
         return UtfNormal::NFC($string);
     }
 }
コード例 #2
0
 /** @todo document */
 function doTestTripleBytes($head, $tail)
 {
     for ($first = 0xc0; $first < 0x100; $first++) {
         for ($second = 0x80; $second < 0x100; $second++) {
             #for( $third = 0x80; $third < 0x100; $third++ ) {
             for ($third = 0x80; $third < 0x81; $third++) {
                 $char = $head . chr($first) . chr($second) . chr($third) . $tail;
                 $clean = UtfNormal::cleanUp($char);
                 $x = sprintf("%02X,%02X,%02X", $first, $second, $third);
                 if ($first >= 0xe0 && $first < 0xf0 && $second < 0xc0 && $third < 0xc0) {
                     if ($first == 0xe0 && $second < 0xa0) {
                         $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Overlong triplet {$x} should be rejected");
                     } elseif ($first == 0xed && chr($first) . chr($second) . chr($third) >= UTF8_SURROGATE_FIRST) {
                         $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Surrogate triplet {$x} should be rejected");
                     } else {
                         $this->assertEquals(bin2hex(UtfNormal::NFC($char)), bin2hex($clean), "Triplet {$x} should be intact");
                     }
                 } elseif ($first > 0xc1 && $first < 0xe0 && $second < 0xc0) {
                     $this->assertEquals(bin2hex(UtfNormal::NFC($head . chr($first) . chr($second)) . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Valid 2-byte {$x} + broken tail");
                 } elseif ($second > 0xc1 && $second < 0xe0 && $third < 0xc0) {
                     $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . UtfNormal::NFC(chr($second) . chr($third) . $tail)), bin2hex($clean), "Broken head + valid 2-byte {$x}");
                 } elseif (($first > 0xfd || $second > 0xfd) && ($second > 0xbf && $third > 0xbf || $second < 0xc0 && $third < 0xc0 || $second > 0xfd || $third > 0xfd)) {
                     # fe and ff are not legal head bytes -- expect three replacement chars
                     $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Forbidden triplet {$x} should be rejected");
                 } elseif ($first > 0xc2 && $second < 0xc0 && $third < 0xc0) {
                     $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Forbidden triplet {$x} should be rejected");
                 } else {
                     $this->assertEquals(bin2hex($head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail), bin2hex($clean), "Forbidden triplet {$x} should be rejected");
                 }
             }
         }
     }
 }