/** * Takes an array of integers representing the Unicode characters and returns * a UTF-8 string. * * @param array $unicode The array of Unicode code points * @param bool $strict Should the function throw an error if something goes wrong * @return string The UTF-8 encoded string or false on failure */ function utf8_from_unicode($unicode, $strict = false) { $rv = ''; $count = count($unicode); for ($i = 0; $i < $count; $i++) { $cp = $unicode[$i]; if (utf8_is_ascii($cp)) { $rv .= chr($cp); continue; } if (!unicode_is_valid($cp)) { if ($strict) { $msg = unicode_is_surrogate($cp) ? 'Illegal surrogate in UTF-8' : 'Codepoint out of Unicode range'; trigger_error(sprintf('utf8_from_unicode: %s at index: %d, value: 0x%X', $msg, $i, $cp)); return false; } continue; } $seqlen = utf8_sequence_length($cp); $rv .= chr(utf8_encode_identifier($cp, $seqlen)); $offset = $seqlen - 2; do { $rv .= chr(utf8_encode_trail($cp, $offset)); } while ($offset--); } return $rv; }
/** * @dataProvider providerUnicodeIsValid */ public function testUnicodeIsValid($cp, $rv) { $this->assertEquals(unicode_is_valid($cp), $rv); }
/** * This function may return boolean false, but may also return a non-boolean 0 * which evaluates to false. Use the === operator for testing the return value * of this function. * * @param string $str The UTF-8 encoded string * @param int $i The current position of the string * @param int $rcp If passed, this will be set to the Unicode code point * for the character * @return string The character or false on the start of the string */ function utf8_get_last_char($str, &$i = null, &$rcp = null) { if ($i === null) { $i = strlen($str) - 1; } while (isset($str[$i])) { $n = $i--; // Save current position and move to the previous octet $char = $str[$n++]; // Move to the next octet at once $ord = ord($char); if (utf8_is_ascii($ord)) { $rcp = $ord; return $char; } if (utf8_is_identifier($ord, $seqlen)) { $cp = utf8_decode_identifier($ord, $seqlen); $offset = $seqlen - 2; // Check for exists the last octet of the sequence at once if (!isset($str[$n + $offset])) { continue; } do { $ord = ord($str[$n]); if (!utf8_is_trail($ord)) { continue 2; // Skip this sequence } $cp |= utf8_decode_trail($ord, $offset); $char .= $str[$n++]; } while ($offset--); if (unicode_is_valid($cp) && utf8_sequence_length($cp) == $seqlen) { $rcp = $cp; return $char; } } } return false; }