/** * decodes a JSON string into appropriate variable * * @param string $str JSON-formatted string * * @return mixed number, boolean, string, array, or object * corresponding to given JSON input string. * See argument 1 to JSON() above for object-output behavior. * Note that decode() always returns strings * in ASCII or UTF-8 format! * @access public */ function decode($str) { $str = $this->reduce_string($str); switch (strtolower($str)) { case 'true': return true; case 'false': return false; case 'null': return null; default: if (is_numeric($str)) { // Lookie-loo, it's a number // This would work on its own, but I'm trying to be // good about returning integers where appropriate: // return (float)$str; // Return float or int, as appropriate return (double) $str == (int) $str ? (int) $str : (double) $str; } elseif (preg_match('/^("|\').+("|\')$/s', $str, $m) && $m[1] == $m[2]) { // STRINGS RETURNED IN UTF-8 FORMAT $delim = substr($str, 0, 1); $chrs = substr($str, 1, -1); $utf8 = ''; $strlen_chrs = strlen($chrs); for ($c = 0; $c < $strlen_chrs; ++$c) { $substr_chrs_c_2 = substr($chrs, $c, 2); $ord_chrs_c = ord($chrs[$c]); switch ($substr_chrs_c_2) { case '\\b': $utf8 .= chr(0x8); $c += 1; break; case '\\t': $utf8 .= chr(0x9); $c += 1; break; case '\\n': $utf8 .= chr(0xa); $c += 1; break; case '\\f': $utf8 .= chr(0xc); $c += 1; break; case '\\r': $utf8 .= chr(0xd); $c += 1; break; case '\\"': case '\\\'': case '\\\\': case '\\/': if ($delim == '"' && $substr_chrs_c_2 != '\\\'' || $delim == "'" && $substr_chrs_c_2 != '\\"') { $utf8 .= $chrs[++$c]; } break; default: if (preg_match('/\\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6))) { // single, escaped unicode character $utf16 = chr(hexdec(substr($chrs, $c + 2, 2))) . chr(hexdec(substr($chrs, $c + 4, 2))); //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); $utf8 .= utf16be_to_utf8($utf16); $c += 5; } elseif ($ord_chrs_c >= 0x20 && $ord_chrs_c <= 0x7f) { $utf8 .= $chrs[$c]; } elseif (($ord_chrs_c & 0xe0) == 0xc0) { // characters U-00000080 - U-000007FF, mask 110XXXXX //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $c, 2); $c += 1; } elseif (($ord_chrs_c & 0xf0) == 0xe0) { // characters U-00000800 - U-0000FFFF, mask 1110XXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $c, 3); $c += 2; } elseif (($ord_chrs_c & 0xf8) == 0xf0) { // characters U-00010000 - U-001FFFFF, mask 11110XXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $c, 4); $c += 3; } elseif (($ord_chrs_c & 0xfc) == 0xf8) { // characters U-00200000 - U-03FFFFFF, mask 111110XX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $c, 5); $c += 4; } elseif (($ord_chrs_c & 0xfe) == 0xfc) { // characters U-04000000 - U-7FFFFFFF, mask 1111110X // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $c, 6); $c += 5; } break; } } return $utf8; } elseif (preg_match('/^\\[.*\\]$/s', $str) || preg_match('/^\\{.*\\}$/s', $str)) { // array, or object notation if ($str[0] == '[') { $stk = array(JSON_IN_ARR); $arr = array(); } else { if ($this->use == JSON_LOOSE_TYPE) { $stk = array(JSON_IN_OBJ); $obj = array(); } else { $stk = array(JSON_IN_OBJ); $obj = new stdClass(); } } array_push($stk, array('what' => JSON_SLICE, 'where' => 0, 'delim' => false)); $chrs = substr($str, 1, -1); $chrs = $this->reduce_string($chrs); if ($chrs == '') { if (reset($stk) == JSON_IN_ARR) { return $arr; } else { return $obj; } } //print("\nparsing {$chrs}\n"); $strlen_chrs = strlen($chrs); for ($c = 0; $c <= $strlen_chrs; ++$c) { $top = end($stk); $substr_chrs_c_2 = substr($chrs, $c, 2); if ($c == $strlen_chrs || $chrs[$c] == ',' && $top['what'] == JSON_SLICE) { // found a comma that is not inside a string, array, etc., // OR we've reached the end of the character list $slice = substr($chrs, $top['where'], $c - $top['where']); array_push($stk, array('what' => JSON_SLICE, 'where' => $c + 1, 'delim' => false)); //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); if (reset($stk) == JSON_IN_ARR) { // we are in an array, so just push an element onto the stack array_push($arr, $this->decode($slice)); } elseif (reset($stk) == JSON_IN_OBJ) { // we are in an object, so figure // out the property name and set an // element in an associative array, // for now if (preg_match('/^\\s*(["\'].*[^\\\\]["\'])\\s*:\\s*(\\S.*),?$/Uis', $slice, $parts)) { // "name":value pair $key = $this->decode($parts[1]); $val = $this->decode($parts[2]); if ($this->use == JSON_LOOSE_TYPE) { $obj[$key] = $val; } else { $obj->{$key} = $val; } } elseif (preg_match('/^\\s*(\\w+)\\s*:\\s*(\\S.*),?$/Uis', $slice, $parts)) { // name:value pair, where name is unquoted $key = $parts[1]; $val = $this->decode($parts[2]); if ($this->use == JSON_LOOSE_TYPE) { $obj[$key] = $val; } else { $obj->{$key} = $val; } } } } elseif (($chrs[$c] == '"' || $chrs[$c] == "'") && in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) { // found a quote, and we are not inside a string array_push($stk, array('what' => JSON_IN_STR, 'where' => $c, 'delim' => $chrs[$c])); //print("Found start of string at {$c}\n"); } elseif ($chrs[$c] == $top['delim'] && $top['what'] == JSON_IN_STR && ($chrs[$c - 1] != "\\" || $chrs[$c - 1] == "\\" && $chrs[$c - 2] == "\\")) { // found a quote, we're in a string, and it's not escaped array_pop($stk); //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n"); } elseif ($chrs[$c] == '[' && in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) { // found a left-bracket, and we are in an array, object, or slice array_push($stk, array('what' => JSON_IN_ARR, 'where' => $c, 'delim' => false)); //print("Found start of array at {$c}\n"); } elseif ($chrs[$c] == ']' && $top['what'] == JSON_IN_ARR) { // found a right-bracket, and we're in an array array_pop($stk); //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); } elseif ($chrs[$c] == '{' && in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) { // found a left-brace, and we are in an array, object, or slice array_push($stk, array('what' => JSON_IN_OBJ, 'where' => $c, 'delim' => false)); //print("Found start of object at {$c}\n"); } elseif ($chrs[$c] == '}' && $top['what'] == JSON_IN_OBJ) { // found a right-brace, and we're in an object array_pop($stk); //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); } elseif ($substr_chrs_c_2 == '/*' && in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) { // found a comment start, and we are in an array, object, or slice array_push($stk, array('what' => JSON_IN_CMT, 'where' => $c, 'delim' => false)); $c++; //print("Found start of comment at {$c}\n"); } elseif ($substr_chrs_c_2 == '*/' && $top['what'] == JSON_IN_CMT) { // found a comment end, and we're in one now array_pop($stk); $c++; for ($i = $top['where']; $i <= $c; ++$i) { $chrs = substr_replace($chrs, ' ', $i, 1); } //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n"); } } if (reset($stk) == JSON_IN_ARR) { return $arr; } elseif (reset($stk) == JSON_IN_OBJ) { return $obj; } } } }
/** * Convert from UTF-16BE to UTF-8 */ function test_from16be() { $this->assertEqual(utf16be_to_utf8($this->utf16), $this->utf8); }