/** * Decode a UTF-8 string to an entity encoded string * * @param string $string Encoded string * @return string Decoded string */ protected static function _decodeUTF8($string) { // don't do decoding when there are no 8bit symbols if (!QuickBooks_Cast::_is8Bit($string, 'utf-8')) { return $string; } // decode four byte unicode characters $string = preg_replace_callback("/([ð-÷])([€-¿])([€-¿])([€-¿])/", function ($arr) { $val = (ord($arr[1]) - 240) * 262144 + (ord($arr[2]) - 128) * 4096 + (ord($arr[3]) - 128) * 64 + (ord($arr[4]) - 128); return "&#" . $val . ";"; }, $string); // decode three byte unicode characters $string = preg_replace_callback("/([à-ï])([€-¿])([€-¿])/", function ($arr) { $val = (ord($arr[1]) - 224) * 4096 + (ord($arr[2]) - 128) * 64 + (ord($arr[3]) - 128); return "&#" . $val . ";"; }, $string); // decode two byte unicode characters $string = preg_replace_callback("/([À-ß])([€-¿])/", function ($arr) { $val = (ord($arr[1]) - 192) * 64 + (ord($arr[2]) - 128); return "&#" . $val . ";"; }, $string); // remove broken unicode $string = preg_replace("/[€-Ÿ]| |[¡-ÿ]/", '?', $string); return $string; }