Exemplo n.º 1
0
/**
 * Takes an array of integers representing the Unicode characters and returns
 * a UTF-8 string.
 *
 * @param  array $unicode The array of Unicode code points
 * @param  bool  $strict  Should the function throw an error if something goes wrong
 * @return string The UTF-8 encoded string or false on failure
 */
function utf8_from_unicode($unicode, $strict = false)
{
    $rv = '';
    $count = count($unicode);
    for ($i = 0; $i < $count; $i++) {
        $cp = $unicode[$i];
        if (utf8_is_ascii($cp)) {
            $rv .= chr($cp);
            continue;
        }
        if (!unicode_is_valid($cp)) {
            if ($strict) {
                $msg = unicode_is_surrogate($cp) ? 'Illegal surrogate in UTF-8' : 'Codepoint out of Unicode range';
                trigger_error(sprintf('utf8_from_unicode: %s at index: %d, value: 0x%X', $msg, $i, $cp));
                return false;
            }
            continue;
        }
        $seqlen = utf8_sequence_length($cp);
        $rv .= chr(utf8_encode_identifier($cp, $seqlen));
        $offset = $seqlen - 2;
        do {
            $rv .= chr(utf8_encode_trail($cp, $offset));
        } while ($offset--);
    }
    return $rv;
}
Exemplo n.º 2
0
 /**
  * @dataProvider providerUnicodeIsValid
  */
 public function testUnicodeIsValid($cp, $rv)
 {
     $this->assertEquals(unicode_is_valid($cp), $rv);
 }
Exemplo n.º 3
0
/**
 * This function may return boolean false, but may also return a non-boolean 0
 * which evaluates to false. Use the === operator for testing the return value
 * of this function.
 *
 * @param  string $str The UTF-8 encoded string
 * @param  int    $i   The current position of the string
 * @param  int    $rcp If passed, this will be set to the Unicode code point
 *                     for the character
 * @return string The character or false on the start of the string
 */
function utf8_get_last_char($str, &$i = null, &$rcp = null)
{
    if ($i === null) {
        $i = strlen($str) - 1;
    }
    while (isset($str[$i])) {
        $n = $i--;
        // Save current position and move to the previous octet
        $char = $str[$n++];
        // Move to the next octet at once
        $ord = ord($char);
        if (utf8_is_ascii($ord)) {
            $rcp = $ord;
            return $char;
        }
        if (utf8_is_identifier($ord, $seqlen)) {
            $cp = utf8_decode_identifier($ord, $seqlen);
            $offset = $seqlen - 2;
            // Check for exists the last octet of the sequence at once
            if (!isset($str[$n + $offset])) {
                continue;
            }
            do {
                $ord = ord($str[$n]);
                if (!utf8_is_trail($ord)) {
                    continue 2;
                    // Skip this sequence
                }
                $cp |= utf8_decode_trail($ord, $offset);
                $char .= $str[$n++];
            } while ($offset--);
            if (unicode_is_valid($cp) && utf8_sequence_length($cp) == $seqlen) {
                $rcp = $cp;
                return $char;
            }
        }
    }
    return false;
}