echo "\n\nTesting for invariants...\n\n"; $fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt'); $n = 0; while (!feof($fp)) { if (++$n % 100 == 0) { echo $n, ' '; } $line = fgets($fp, 1024); if (!($pos = strpos($line, ';'))) { continue; } $hex_tested = $hex_expected = substr($line, 0, $pos); if (isset($tested_chars[$hex_tested])) { continue; } $utf_expected = hex_to_utf($hex_expected); if ($utf_expected >= UTF8_SURROGATE_FIRST && $utf_expected <= UTF8_SURROGATE_LAST) { /** * Surrogates are illegal on their own, we expect the normalizer * to return a replacement char */ $utf_expected = UTF8_REPLACEMENT; $hex_expected = utf_to_hexseq($utf_expected); } foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form) { $utf_result = $utf_expected; utf_normalizer::$form($utf_result); $hex_result = utf_to_hexseq($utf_result); // echo "$form($utf_expected) == $utf_result\n"; if (strcmp($utf_expected, $utf_result)) { $failed = 1;
$map_to_hex = isset($m[13][0]) ? $m[13] : $m[0]; if (preg_match('#^LATIN.*(?:LETTER|LIGATURE) ([A-Z]{2}(?![A-Z]))$#', $m[1], $capture)) { /** * Special hack for some latin ligatures. Using the name of a character * is bad practice, but for now it works well enough. * * @todo Note that ligatures with combining marks such as U+01E2 are * not supported at this time */ $map[$cp] = strtolower($capture[1]); } else { if (isset($m[13][0])) { /** * If the letter has a lowercased form, use it */ $map[$cp] = hex_to_utf($m[13]); } else { /** * In all other cases, map the letter to itself */ $map[$cp] = $utf_char; } } break; case 'M': /** * We allow all marks, they are mapped to themselves */ $map[$cp] = $utf_char; break; case 'N':