/** * encodes an arbitrary variable into JSON format * * @param mixed $var any number, boolean, string, array, or object to be encoded. * see argument 1 to JSON() above for array-parsing behavior. * if var is a strng, note that encode() always expects it * to be in ASCII or UTF-8 format! * * @return string JSON string representation of input var * @access public */ function encode($var) { switch (gettype($var)) { case 'boolean': return $var ? 'true' : 'false'; case 'NULL': return 'null'; case 'integer': return sprintf('%d', $var); case 'double': case 'float': return sprintf('%f', $var); case 'string': // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT $ascii = ''; $strlen_var = strlen($var); /* * Iterate over every character in the string, * escaping with a slash or encoding to UTF-8 where necessary */ for ($c = 0; $c < $strlen_var; ++$c) { $ord_var_c = ord($var[$c]); switch ($ord_var_c) { case 0x8: $ascii .= '\\b'; break; case 0x9: $ascii .= '\\t'; break; case 0xa: $ascii .= '\\n'; break; case 0xc: $ascii .= '\\f'; break; case 0xd: $ascii .= '\\r'; break; case 0x22: case 0x2f: case 0x5c: // double quote, slash, slosh $ascii .= '\\' . $var[$c]; break; case $ord_var_c >= 0x20 && $ord_var_c <= 0x7f: // characters U-00000000 - U-0000007F (same as ASCII) $ascii .= $var[$c]; break; case ($ord_var_c & 0xe0) == 0xc0: // characters U-00000080 - U-000007FF, mask 110XXXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var[$c + 1])); $c += 1; //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\\u%04s', bin2hex($utf16)); break; case ($ord_var_c & 0xf0) == 0xe0: // characters U-00000800 - U-0000FFFF, mask 1110XXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2])); $c += 2; //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\\u%04s', bin2hex($utf16)); break; case ($ord_var_c & 0xf8) == 0xf0: // characters U-00010000 - U-001FFFFF, mask 11110XXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3])); $c += 3; //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\\u%04s', bin2hex($utf16)); break; case ($ord_var_c & 0xfc) == 0xf8: // characters U-00200000 - U-03FFFFFF, mask 111110XX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3]), ord($var[$c + 4])); $c += 4; //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\\u%04s', bin2hex($utf16)); break; case ($ord_var_c & 0xfe) == 0xfc: // characters U-04000000 - U-7FFFFFFF, mask 1111110X // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var[$c + 1]), ord($var[$c + 2]), ord($var[$c + 3]), ord($var[$c + 4]), ord($var[$c + 5])); $c += 5; //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8'); $utf16 = utf8_to_utf16be($char); $ascii .= sprintf('\\u%04s', bin2hex($utf16)); break; } } return '"' . $ascii . '"'; case 'array': /* * As per JSON spec if any array key is not an integer * we must treat the the whole array as an object. We * also try to catch a sparsely populated associative * array with numeric keys here because some JS engines * will create an array with empty indexes up to * max_index which can cause memory issues and because * the keys, which may be relevant, will be remapped * otherwise. * * As per the ECMA and JSON specification an object may * have any string as a property. Unfortunately due to * a hole in the ECMA specification if the key is a * ECMA reserved word or starts with a digit the * parameter is only accessible using ECMAScript's * bracket notation. */ // treat as a JSON object if (is_array($var) && count($var) && array_keys($var) !== range(0, count($var) - 1)) { return sprintf('{%s}', join(',', array_map(array($this, 'name_value'), array_keys($var), array_values($var)))); } // treat it like a regular array return sprintf('[%s]', join(',', array_map(array($this, 'encode'), $var))); case 'object': $vars = get_object_vars($var); return sprintf('{%s}', join(',', array_map(array($this, 'name_value'), array_keys($vars), array_values($vars)))); default: return ''; } }
/** * Convert from UTF-8 to UTF-16BE */ function test_to16be() { $this->assertEqual(utf8_to_utf16be($this->utf8), $this->utf16); }