/** * Takes an array of integers representing the Unicode characters and returns * a UTF-8 string. * * @param array $unicode The array of Unicode code points * @param bool $strict Should the function throw an error if something goes wrong * @return string The UTF-8 encoded string or false on failure */ function utf8_from_unicode($unicode, $strict = false) { $rv = ''; $count = count($unicode); for ($i = 0; $i < $count; $i++) { $cp = $unicode[$i]; if (utf8_is_ascii($cp)) { $rv .= chr($cp); continue; } if (!unicode_is_valid($cp)) { if ($strict) { $msg = unicode_is_surrogate($cp) ? 'Illegal surrogate in UTF-8' : 'Codepoint out of Unicode range'; trigger_error(sprintf('utf8_from_unicode: %s at index: %d, value: 0x%X', $msg, $i, $cp)); return false; } continue; } $seqlen = utf8_sequence_length($cp); $rv .= chr(utf8_encode_identifier($cp, $seqlen)); $offset = $seqlen - 2; do { $rv .= chr(utf8_encode_trail($cp, $offset)); } while ($offset--); } return $rv; }
/** * @dataProvider providerFailingUtf8EncodeIdentifier */ public function testFailingUtf8EncodeIdentifierReturnValue($cp, $seqlen) { $this->assertFalse(@utf8_encode_identifier($cp, $seqlen)); }
/** * UTF-8 aware replacement for chr(). * * Returns a string containing the character specified by UTF-8. * * @param int $cp The Unicode code point * @param bool $strict Should the function throw an error if something goes wrong * @return string The character specified by UTF-8 or empty string/false on * failure */ function utf8_chr($cp, $strict = false) { if (utf8_is_ascii($cp)) { return chr($cp); } if (!unicode_is_valid($cp)) { if ($strict) { $msg = unicode_is_surrogate($cp) ? 'Illegal surrogate' : 'Codepoint out of Unicode range'; trigger_error(sprintf('utf8_chr: %s, value: 0x%X', $msg, $cp)); return false; } return ''; } $seqlen = utf8_sequence_length($cp); $char = chr(utf8_encode_identifier($cp, $seqlen)); $offset = $seqlen - 2; do { $char .= chr(utf8_encode_trail($cp, $offset)); } while ($offset--); return $char; }