/** * Determine if a string is valid UTF-8. * * @param string Some string which may or may not be valid UTF-8. * @return bool True if the string is valid UTF-8. */ function phutil_is_utf8($string) { if (function_exists('mb_check_encoding')) { // If mbstring is available, this is significantly faster than using PHP. return mb_check_encoding($string, 'UTF-8'); } return phutil_is_utf8_slowly($string); }
public function testUTF8BMP() { $tests = array('' => array(true, true, pht('empty string')), 'a' => array(true, true, 'a'), "a͠͠" => array(true, true, pht('%s with combining', 'a')), "☃" => array(true, true, pht('snowman')), "" => array(true, true, 'U+FFFF'), "��" => array(false, false, pht('Invalid, byte range.')), "����" => array(false, false, pht('Nonminimal 4-byte character.')), "𐀀" => array(true, false, 'U+10000'), "𝄞" => array(true, false, 'gclef'), "musical 𝄞 g-clef" => array(true, false, pht('gclef text')), "�" => array(false, false, pht('Invalid, truncated.')), "���" => array(false, false, pht('Nonminimal 3-byte character.')), "�" => array(false, false, pht('Partial 2-byte character.')), "�" => array(false, false, pht('Partial BMP 0xE0 character.')), "�" => array(false, false, pht('Partial BMP cahracter.'))); foreach ($tests as $input => $test) { list($expect_utf8, $expect_bmp, $test_name) = $test; // Depending on what's installed on the system, this may use an // extension. $this->assertEqual($expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name)); // Also test this against the pure PHP implementation, explicitly. $this->assertEqual($expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name)); $this->assertEqual($expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name)); } }