Example #1
0
/**
 * Determine if a string is valid UTF-8.
 *
 * @param string  Some string which may or may not be valid UTF-8.
 * @return bool    True if the string is valid UTF-8.
 */
function phutil_is_utf8($string)
{
    if (function_exists('mb_check_encoding')) {
        // If mbstring is available, this is significantly faster than using PHP.
        return mb_check_encoding($string, 'UTF-8');
    }
    return phutil_is_utf8_slowly($string);
}
 public function testUTF8BMP()
 {
     $tests = array('' => array(true, true, pht('empty string')), 'a' => array(true, true, 'a'), "a͠͠" => array(true, true, pht('%s with combining', 'a')), "☃" => array(true, true, pht('snowman')), "￿" => array(true, true, 'U+FFFF'), "��" => array(false, false, pht('Invalid, byte range.')), "����" => array(false, false, pht('Nonminimal 4-byte character.')), "𐀀" => array(true, false, 'U+10000'), "𝄞" => array(true, false, 'gclef'), "musical 𝄞 g-clef" => array(true, false, pht('gclef text')), "�" => array(false, false, pht('Invalid, truncated.')), "���" => array(false, false, pht('Nonminimal 3-byte character.')), "�" => array(false, false, pht('Partial 2-byte character.')), "�" => array(false, false, pht('Partial BMP 0xE0 character.')), "�" => array(false, false, pht('Partial BMP cahracter.')));
     foreach ($tests as $input => $test) {
         list($expect_utf8, $expect_bmp, $test_name) = $test;
         // Depending on what's installed on the system, this may use an
         // extension.
         $this->assertEqual($expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name));
         // Also test this against the pure PHP implementation, explicitly.
         $this->assertEqual($expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name));
         $this->assertEqual($expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name));
     }
 }