echo "\n\nTesting for invariants...\n\n";
$fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
$n = 0;
while (!feof($fp)) {
    if (++$n % 100 == 0) {
        echo $n, ' ';
    }
    $line = fgets($fp, 1024);
    if (!($pos = strpos($line, ';'))) {
        continue;
    }
    $hex_tested = $hex_expected = substr($line, 0, $pos);
    if (isset($tested_chars[$hex_tested])) {
        continue;
    }
    $utf_expected = hex_to_utf($hex_expected);
    if ($utf_expected >= UTF8_SURROGATE_FIRST && $utf_expected <= UTF8_SURROGATE_LAST) {
        /**
         * Surrogates are illegal on their own, we expect the normalizer
         * to return a replacement char
         */
        $utf_expected = UTF8_REPLACEMENT;
        $hex_expected = utf_to_hexseq($utf_expected);
    }
    foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form) {
        $utf_result = $utf_expected;
        utf_normalizer::$form($utf_result);
        $hex_result = utf_to_hexseq($utf_result);
        //		echo "$form($utf_expected) == $utf_result\n";
        if (strcmp($utf_expected, $utf_result)) {
            $failed = 1;
Example #2
0
     $map_to_hex = isset($m[13][0]) ? $m[13] : $m[0];
     if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture)) {
         /**
          * Special hack for some latin ligatures. Using the name of a character
          * is bad practice, but for now it works well enough.
          *
          * @todo Note that ligatures with combining marks such as U+01E2 are
          * not supported at this time
          */
         $map[$cp] = strtolower($capture[1]);
     } else {
         if (isset($m[13][0])) {
             /**
              * If the letter has a lowercased form, use it
              */
             $map[$cp] = hex_to_utf($m[13]);
         } else {
             /**
              * In all other cases, map the letter to itself
              */
             $map[$cp] = $utf_char;
         }
     }
     break;
 case 'M':
     /**
      * We allow all marks, they are mapped to themselves
      */
     $map[$cp] = $utf_char;
     break;
 case 'N':