/** * Converts encoding of file according to parameters with detected * conversion function. The old file will be unlinked and new created and * its file name is returned. * * @param string source charset * @param string target charset * @param string file to convert * * @return string new temporay file * * @access public * * @author nijel */ function PMA_convert_file($src_charset, $dest_charset, $file) { switch ($GLOBALS['PMA_recoding_engine']) { case PMA_CHARSET_RECODE: case PMA_CHARSET_ICONV: case PMA_CHARSET_LIBICONV: $tmpfname = tempnam('', 'PMA_convert_file'); $fin = fopen($file, 'r'); $fout = fopen($tmpfname, 'w'); if ($GLOBALS['PMA_recoding_engine'] == PMA_CHARSET_RECODE) { recode_file($src_charset . '..' . $dest_charset, $fin, $fout); } else { while (!feof($fin)) { $line = fgets($fin, 4096); if ($GLOBALS['PMA_recoding_engine'] == PMA_CHARSET_ICONV) { $dist = iconv($src_charset, $dest_charset . $GLOBALS['cfg']['IconvExtraParams'], $line); } elseif ($GLOBALS['PMA_recoding_engine'] == PMA_CHARSET_ICONV_AIX) { $dist = PMA_aix_iconv_wrapper($src_charset, $dest_charset . $GLOBALS['cfg']['IconvExtraParams'], $line); } else { $dist = libiconv($src_charset, $dest_charset . $GLOBALS['cfg']['IconvExtraParams'], $line); } fputs($fout, $dist); } // end while } fclose($fin); fclose($fout); unlink($file); return $tmpfname; default: return $file; } }
/** * converts charset of a mysql message, usually coming from mysql_error(), * into PMA charset, usally UTF-8 * uses language to charset mapping from mysql/share/errmsg.txt * and charset names to ISO charset from information_schema.CHARACTER_SETS * * @uses $GLOBALS['cfg']['IconvExtraParams'] * @uses $GLOBALS['charset'] as target charset * @uses PMA_DBI_fetch_value() to get server_language * @uses preg_match() to filter server_language * @uses in_array() * @uses function_exists() to check for a convert function * @uses iconv() to convert message * @uses libiconv() to convert message * @uses recode_string() to convert message * @uses mb_convert_encoding() to convert message * @param string $message * @return string $message */ function PMA_DBI_convert_message($message) { // latin always last! $encodings = array('japanese' => 'EUC-JP', 'japanese-sjis' => 'Shift-JIS', 'korean' => 'EUC-KR', 'russian' => 'KOI8-R', 'ukrainian' => 'KOI8-U', 'greek' => 'ISO-8859-7', 'serbian' => 'CP1250', 'estonian' => 'ISO-8859-13', 'slovak' => 'ISO-8859-2', 'czech' => 'ISO-8859-2', 'hungarian' => 'ISO-8859-2', 'polish' => 'ISO-8859-2', 'romanian' => 'ISO-8859-2', 'spanish' => 'CP1252', 'swedish' => 'CP1252', 'italian' => 'CP1252', 'norwegian-ny' => 'CP1252', 'norwegian' => 'CP1252', 'portuguese' => 'CP1252', 'danish' => 'CP1252', 'dutch' => 'CP1252', 'english' => 'CP1252', 'french' => 'CP1252', 'german' => 'CP1252'); if ($server_language = PMA_DBI_fetch_value('SHOW VARIABLES LIKE \'language\';', 0, 1)) { $found = array(); if (preg_match('&(?:\\\\|\\/)([^\\\\\\/]*)(?:\\\\|\\/)$&i', $server_language, $found)) { $server_language = $found[1]; } } if (!empty($server_language) && isset($encodings[$server_language])) { if (function_exists('iconv')) { if (@stristr(PHP_OS, 'AIX') && @strcasecmp(ICONV_IMPL, 'unknown') == 0 && @strcasecmp(ICONV_VERSION, 'unknown') == 0) { require_once './libraries/iconv_wrapper.lib.php'; $message = PMA_aix_iconv_wrapper($encodings[$server_language], $GLOBALS['charset'] . $GLOBALS['cfg']['IconvExtraParams'], $message); } else { $message = iconv($encodings[$server_language], $GLOBALS['charset'] . $GLOBALS['cfg']['IconvExtraParams'], $message); } } elseif (function_exists('recode_string')) { $message = recode_string($encodings[$server_language] . '..' . $GLOBALS['charset'], $message); } elseif (function_exists('libiconv')) { $message = libiconv($encodings[$server_language], $GLOBALS['charset'], $message); } elseif (function_exists('mb_convert_encoding')) { // do not try unsupported charsets if (!in_array($server_language, array('ukrainian', 'greek', 'serbian'))) { $message = mb_convert_encoding($message, $GLOBALS['charset'], $encodings[$server_language]); } } } else { /** * @todo lang not found, try all, what TODO ? */ } return $message; }
/** * Convert a string from UTF-8 to any of various encodings * * @param string String to decode *[@param string Encoding; Default: ISO-8859-1] *[@param bool Safe Mode: if set to TRUE, the original string is retunred on errors] * @return string The decoded string or false on failure * @since 0.0.1 */ function decode_utf8($string = '', $encoding = 'iso-8859-1', $safe_mode = false) { $safe = $safe_mode ? $string : false; if (!$encoding) { $encoding = 'ISO-8859-1'; } if (strtoupper($encoding) == 'UTF-8' || strtoupper($encoding) == 'UTF8') { return $string; } elseif (strtoupper($encoding) == 'ISO-8859-1') { return utf8_decode($string); } elseif (strtoupper($encoding) == 'WINDOWS-1252') { return map_iso8859_1_w1252(utf8_decode($string)); } elseif (strtoupper($encoding) == 'UNICODE-1-1-UTF-7') { $encoding = 'utf-7'; } if (function_exists('mb_convert_encoding')) { $conv = @mb_convert_encoding($string, strtoupper($encoding), 'UTF-8'); if ($conv) { return $conv; } } if (function_exists('iconv')) { $conv = @iconv('UTF-8', strtoupper($encoding), $string); if ($conv) { return $conv; } } if (function_exists('libiconv')) { $conv = @libiconv('UTF-8', strtoupper($encoding), $string); if ($conv) { return $conv; } } return $safe; }
public static function ConvertCharset($string, $charset_in, $charset_out, &$errorMessage = "") { $string = strval($string); if(strcasecmp($charset_in, $charset_out) == 0) return $string; $errorMessage = ''; if ($string == '') return ''; if (extension_loaded("mbstring")) { //For UTF-16 we have to detect the order of bytes //Default for mbstring extension is Big endian //Little endian have to pointed explicitly if (strtoupper($charset_in) == "UTF-16") { $ch = substr($string, 0, 1); //If Little endian found - cutoff BOF bytes and point mbstring to this fact explicitly if ($ch == "\xFF" && substr($string, 1, 1) == "\xFE") return mb_convert_encoding(substr($string, 2), $charset_out, "UTF-16LE"); //If it is Big endian, just remove BOF bytes elseif ($ch == "\xFE" && substr($string, 1, 1) == "\xFF") return mb_convert_encoding(substr($string, 2), $charset_out, $charset_in); //Otherwise assime Little endian without BOF else return mb_convert_encoding($string, $charset_out, "UTF-16LE"); } else { $res = mb_convert_encoding($string, $charset_out, $charset_in); if (strlen($res) > 0) return $res; } } if (!defined("BX_ICONV_DISABLE") || BX_ICONV_DISABLE !== true) { $utf_string = false; if (strtoupper($charset_in) == "UTF-16") { $ch = substr($string, 0, 1); if (($ch != "\xFF") || ($ch != "\xFE")) $utf_string = "\xFF\xFE".$string; } if (function_exists('iconv')) { if ($utf_string) $res = iconv($charset_in, $charset_out."//IGNORE", $utf_string); else $res = iconv($charset_in, $charset_out."//IGNORE", $string); if (!$res) $errorMessage .= "Iconv reported failure while converting string to requested character encoding. "; return $res; } elseif (function_exists('libiconv')) { if ($utf_string) $res = libiconv($charset_in, $charset_out, $utf_string); else $res = libiconv($charset_in, $charset_out, $string); if (!$res) $errorMessage .= "Libiconv reported failure while converting string to requested character encoding. "; return $res; } } $cvt = self::GetInstance(); $res = $cvt->Convert($string, $charset_in, $charset_out); if (!$res) { $arErrors = $cvt->GetErrors(); if (count($arErrors) > 0) $errorMessage = implode("\n", $arErrors); } return $res; }
function iconv($input_encoding, $output_encoding, $string) { return libiconv($input_encoding, $output_encoding, $string); }
public static function convertEncoding($string, $charsetFrom, $charsetTo, &$errorMessage = "") { $string = strval($string); if (strcasecmp($charsetFrom, $charsetTo) == 0) { return $string; } $errorMessage = ''; if ($string == '') { return ''; } if (extension_loaded("mbstring")) { //For UTF-16 we have to detect the order of bytes //Default for mbstring extension is Big endian //Little endian have to pointed explicitly if (strtoupper($charsetFrom) == "UTF-16") { $ch = substr($string, 0, 1); //If Little endian found - cutoff BOF bytes and point mbstring to this fact explicitly if ($ch == "ÿ" && substr($string, 1, 1) == "þ") { return mb_convert_encoding(substr($string, 2), $charsetTo, "UTF-16LE"); } elseif ($ch == "þ" && substr($string, 1, 1) == "ÿ") { return mb_convert_encoding(substr($string, 2), $charsetTo, $charsetFrom); } else { return mb_convert_encoding($string, $charsetTo, "UTF-16LE"); } } else { $res = mb_convert_encoding($string, $charsetTo, $charsetFrom); if (strlen($res) > 0) { return $res; } } } if (!defined("FX_ICONV_DISABLE") || FX_ICONV_DISABLE !== true) { $utf_string = false; if (strtoupper($charsetFrom) == "UTF-16") { $ch = substr($string, 0, 1); if ($ch != "ÿ" || $ch != "þ") { $utf_string = "ÿþ" . $string; } } if (function_exists('iconv')) { if ($utf_string) { $res = iconv($charsetFrom, $charsetTo . "//IGNORE", $utf_string); } else { $res = iconv($charsetFrom, $charsetTo . "//IGNORE", $string); } if (!$res) { $errorMessage .= "Iconv reported failure while converting string to requested character encoding. "; } return $res; } elseif (function_exists('libiconv')) { if ($utf_string) { $res = libiconv($charsetFrom, $charsetTo, $utf_string); } else { $res = libiconv($charsetFrom, $charsetTo, $string); } if (!$res) { $errorMessage .= "Libiconv reported failure while converting string to requested character encoding. "; } return $res; } } $cvt = self::getInstance(); $res = $cvt->convert($string, $charsetFrom, $charsetTo); if (!$res) { $arErrors = $cvt->getErrors(); if (count($arErrors) > 0) { $errorMessage = implode("\n", $arErrors); } } return $res; }
function fromUTF8($text) { if ($this->charSet == 'utf-8') { return $text; } switch ($this->getEngine()) { case SB_CHARSET_ICONV: return iconv('utf-8', $this->charSet . "//TRANSLIT", $text); case SB_CHARSET_LIBICONV: return libiconv('utf-8', $this->charSet, $text); case SB_CHARSET_RECODE: return recode_string('utf-8' . '..' . $this->charSet, $text); default: return utf8_decode($text); } }
/** * @param string $sInputString * @param string $sInputFromEncoding * @param string $sInputToEncoding * * @return string */ public static function ConvertEncoding($sInputString, $sInputFromEncoding, $sInputToEncoding) { $sResult = $sInputString; $sFromEncoding = \MailSo\Base\Utils::normalizeCharset($sInputFromEncoding); $sToEncoding = \MailSo\Base\Utils::normalizeCharset($sInputToEncoding); if ('' === \trim($sResult) || $sFromEncoding === $sToEncoding && \MailSo\Base\Enumerations\Charset::UTF_8 !== $sFromEncoding) { return $sResult; } $bUnknown = false; switch (true) { default: $bUnknown = true; break; case $sFromEncoding === \MailSo\Base\Enumerations\Charset::ISO_8859_1 && $sToEncoding === \MailSo\Base\Enumerations\Charset::UTF_8 && \function_exists('utf8_encode'): $sResult = \utf8_encode($sResult); break; case $sFromEncoding === \MailSo\Base\Enumerations\Charset::UTF_8 && $sToEncoding === \MailSo\Base\Enumerations\Charset::ISO_8859_1 && \function_exists('utf8_decode'): $sResult = \utf8_decode($sResult); break; case $sFromEncoding === \MailSo\Base\Enumerations\Charset::UTF_7_IMAP && $sToEncoding === \MailSo\Base\Enumerations\Charset::UTF_8: $sResult = \MailSo\Base\Utils::Utf7ModifiedToUtf8($sResult); if (false === $sResult) { $sResult = $sInputString; } break; case $sFromEncoding === \MailSo\Base\Enumerations\Charset::UTF_8 && $sToEncoding === \MailSo\Base\Enumerations\Charset::UTF_7_IMAP: $sResult = \MailSo\Base\Utils::Utf8ToUtf7Modified($sResult); if (false === $sResult) { $sResult = $sInputString; } break; case $sFromEncoding === \MailSo\Base\Enumerations\Charset::UTF_7_IMAP: $sResult = \MailSo\Base\Utils::ConvertEncoding(\MailSo\Base\Utils::ModifiedToPlainUtf7($sResult), \MailSo\Base\Enumerations\Charset::UTF_7, $sToEncoding); break; case \in_array(\strtolower($sFromEncoding), \MailSo\Base\Utils::$SuppostedCharsets): if (\MailSo\Base\Utils::IsIconvSupported()) { $sResult = @\iconv(\strtoupper($sFromEncoding), \strtoupper($sToEncoding) . '//IGNORE', $sResult); } else { if (\MailSo\Base\Utils::IsMbStringSupported()) { $sResult = @\mb_convert_encoding($sResult, \strtoupper($sToEncoding), \strtoupper($sFromEncoding)); } else { if (\MailSo\Base\Utils::IsLibIconvSupported()) { $sResult = @\libiconv(\strtoupper($sFromEncoding), \strtoupper($sToEncoding), $sResult); } } } $sResult = false !== $sResult ? $sResult : $sInputString; break; } if ($bUnknown && \MailSo\Base\Utils::IsMbStringSupported()) { $sResult = @\mb_convert_encoding($sResult, $sToEncoding); } return $sResult; }
function confCharset($srcCharset, $destCharset, $str) { if (@function_exists('iconv')) { if ($ret = iconv($srcCharset, $destCharset, $str)) { return $ret; } } else { if (@function_exists('libiconv')) { if ($ret = libiconv($srcCharset, $destCharset, $str)) { return $ret; } } elseif (@function_exists('recode_string')) { if ($ret = recode_string($srcCharset . '..' . $destCharset, $str)) { return $ret; } } } return $str; }