According to RFC 3629, a UTF-8 character can have at most 4 bytes.
However, this implementation supports UTF-8 characters containing up to 6
bytes.
public static getCharLength ( string $byte ) : integer | ||
$byte | string | The byte to be analyzed. |
Résultat | integer |
public function testGetCharLength() { $this->assertEquals(1, UtfString::getCharLength(chr(0x0))); // 00000000 $this->assertEquals(1, UtfString::getCharLength(chr(0x7f))); // 01111111 $this->assertEquals(2, UtfString::getCharLength(chr(0xc0))); // 11000000 $this->assertEquals(2, UtfString::getCharLength(chr(0xdf))); // 11011111 $this->assertEquals(3, UtfString::getCharLength(chr(0xe0))); // 11100000 $this->assertEquals(3, UtfString::getCharLength(chr(0xef))); // 11101111 $this->assertEquals(4, UtfString::getCharLength(chr(0xf0))); // 11110000 $this->assertEquals(4, UtfString::getCharLength(chr(0xf7))); // 11110111 $this->assertEquals(5, UtfString::getCharLength(chr(0xf8))); // 11111000 $this->assertEquals(5, UtfString::getCharLength(chr(0xfb))); // 11111011 $this->assertEquals(6, UtfString::getCharLength(chr(0xfc))); // 11111100 $this->assertEquals(6, UtfString::getCharLength(chr(0xfd))); // 11111101 }