/** * Decode euc-kr encoded string * @param string $string euc-kr string * @param boolean $save_html don't html encode special characters if true * @return string $string decoded string */ function charset_decode_euc_kr($string, $save_html = false) { // global $aggressive_decoding; // don't do decoding when there are no 8bit symbols if (!sq_is8bit($string, 'euc-kr')) { return $string; } // this is CPU intensive task. Use recode functions if they are available. if (function_exists('recode_string')) { // if string is already sanitized, undo htmlspecial chars if (!$save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } $string = recode_string("euc-kr..html", $string); // if string sanitizing is not needed, undo htmlspecialchars applied by recode. if ($save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } return $string; } /* * iconv does not support html target, but internal utf-8 decoding is faster * than pure php implementation. */ if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) { include_once SM_PATH . 'functions/decode/utf_8.php'; $string = iconv('euc-kr', 'utf-8', $string); return charset_decode_utf_8($string); } // try mbstring if (function_exists('mb_convert_encoding') && function_exists('sq_mb_list_encodings') && check_php_version(4, 3, 0) && in_array('euc-kr', sq_mb_list_encodings())) { return mb_convert_encoding($string, 'HTML-ENTITIES', 'EUC-KR'); } return $string; }
/** * @param string $str * @param string $to_encoding * @param string $from_encoding * @param string $default_charset * @return string */ function sqimap_mb_convert_encoding($str, $to_encoding, $from_encoding, $default_charset) { $supported_encodings = sq_mb_list_encodings(); if (in_array(strtolower($default_charset), $supported_encodings) && function_exists('mb_convert_encoding')) { return mb_convert_encoding($str, $to_encoding, $from_encoding); } return ''; }
/** * Function returns number of characters in string. * * Returned number might be different from number of bytes in string, * if $charset is multibyte charset. Detection depends on mbstring * functions. If mbstring does not support tested multibyte charset, * vanilla string length function is used. * @param string $str string * @param string $charset charset * @since 1.5.1 and 1.4.6 * @return integer number of characters in string */ function sq_strlen($str, $charset = null) { // default option if (is_null($charset)) { return strlen($str); } // lowercase charset name $charset = strtolower($charset); // use automatic charset detection, if function call asks for it if ($charset == 'auto') { global $default_charset; set_my_charset(); $charset = $default_charset; } // Use mbstring only with listed charsets $aList_of_mb_charsets = array('utf-8', 'big5', 'gb2312', 'gb18030', 'euc-jp', 'euc-cn', 'euc-tw', 'euc-kr'); // calculate string length according to charset if (in_array($charset, $aList_of_mb_charsets) && in_array($charset, sq_mb_list_encodings())) { $real_length = mb_strlen($str, $charset); } else { // own strlen detection code is removed because missing strpos, // strtoupper and substr implementations break string wrapping. $real_length = strlen($str); } return $real_length; }
/** * Encodes header as quoted-printable * * Encode a string according to RFC 1522 for use in headers if it * contains 8-bit characters or anything that looks like it should * be encoded. */ function encodeHeader($string) { global $default_charset, $languages, $squirrelmail_language; if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string); } // Use B encoding for multibyte charsets $mb_charsets = array('utf-8', 'big5', 'gb2313', 'euc-kr'); if (in_array($default_charset, $mb_charsets) && in_array($default_charset, sq_mb_list_encodings()) && sq_is8bit($string)) { return encodeHeaderBase64($string, $default_charset); } elseif (in_array($default_charset, $mb_charsets) && sq_is8bit($string) && !in_array($default_charset, sq_mb_list_encodings())) { // Add E_USER_NOTICE error here (can cause 'Cannot add header information' warning in compose.php) // trigger_error('encodeHeader: Multibyte character set unsupported by mbstring extension.',E_USER_NOTICE); } // Encode only if the string contains 8-bit characters or =? $j = strlen($string); $max_l = 75 - strlen($default_charset) - 7; $aRet = array(); $ret = ''; $iEncStart = $enc_init = false; $cur_l = $iOffset = 0; for ($i = 0; $i < $j; ++$i) { switch ($string[$i]) { case '=': case '<': case '>': case ',': case '?': case '_': if ($iEncStart === false) { $iEncStart = $i; } $cur_l += 3; if ($cur_l > $max_l - 2) { /* if there is an stringpart that doesn't need encoding, add it */ $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?="; $iOffset = $i; $cur_l = 0; $ret = ''; $iEncStart = false; } else { $ret .= sprintf("=%02X", ord($string[$i])); } break; case '(': case ')': if ($iEncStart !== false) { $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?="; $iOffset = $i; $cur_l = 0; $ret = ''; $iEncStart = false; } break; case ' ': if ($iEncStart !== false) { $cur_l++; if ($cur_l > $max_l) { $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?="; $iOffset = $i; $cur_l = 0; $ret = ''; $iEncStart = false; } else { $ret .= '_'; } } break; default: $k = ord($string[$i]); if ($k > 126) { if ($iEncStart === false) { // do not start encoding in the middle of a string, also take the rest of the word. $sLeadString = substr($string, 0, $i); $aLeadString = explode(' ', $sLeadString); $sToBeEncoded = array_pop($aLeadString); $iEncStart = $i - strlen($sToBeEncoded); $ret .= $sToBeEncoded; $cur_l += strlen($sToBeEncoded); } $cur_l += 3; /* first we add the encoded string that reached it's max size */ if ($cur_l > $max_l - 2) { $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?= "; /* the next part is also encoded => separate by space */ $cur_l = 3; $ret = ''; $iOffset = $i; $iEncStart = $i; } $enc_init = true; $ret .= sprintf("=%02X", $k); } else { if ($iEncStart !== false) { $cur_l++; if ($cur_l > $max_l) { $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?="; $iEncStart = false; $iOffset = $i; $cur_l = 0; $ret = ''; } else { $ret .= $string[$i]; } } } break; } } if ($enc_init) { if ($iEncStart !== false) { $aRet[] = substr($string, $iOffset, $iEncStart - $iOffset); $aRet[] = "=?{$default_charset}?Q?{$ret}?="; } else { $aRet[] = substr($string, $iOffset); } $string = implode('', $aRet); } return $string; }
/** * Wrapper that is used to switch between vanilla and multibyte strtoupper * functions. * @param string $string * @param string $charset * @return string * @since 1.5.1 * @link http://www.php.net/strtoupper * @link http://www.php.net/mb_strtoupper */ function sq_strtoupper($string, $charset = 'auto') { // use automatic charset detection, if function call asks for it static $charset_auto, $bUse_mb; if ($charset == 'auto') { if (!isset($charset_auto)) { global $default_charset, $squirrelmail_language; set_my_charset(); $charset = $default_charset; if ($squirrelmail_language == 'ja_JP') { $charset = 'euc-jp'; } $charset_auto = $charset; } else { $charset = $charset_auto; } } $charset = strtolower($charset); // in_array call is expensive => do it once and use a static var for // storing the results if (!isset($bUse_mb)) { if (function_exists('mb_strtoupper') && in_array($charset, sq_mb_list_encodings())) { $bUse_mb = true; } else { $bUse_mb = false; } } if ($bUse_mb) { return mb_strtoupper($string, $charset); } // TODO: add mbstring independent code // use vanilla string functions as last option return strtoupper($string); }