/** * Romanize a non-latin string * * @author Andreas Gohr <*****@*****.**> */ function utf8Romanize($string) { if (utf8_isASCII($string)) { return $string; //nothing to do } $romanize = romanizeFile(null); return strtr($string, $romanize); }
function utf8_romanize($string) { if (utf8_isASCII($string)) { return $string; } //nothing to do global $UTF8_ROMANIZATION; // see: http://php.net/manual/de/function.strtr.php return strtr($string, $UTF8_ROMANIZATION); }
/** * Romanize a non-latin string * * @author Andreas Gohr <*****@*****.**> */ function utf8_romanize($string) { if (utf8_isASCII($string)) { return $string; } //nothing to do global $UTF8_ROMANIZATION; return strtr($string, $UTF8_ROMANIZATION); }
function charset_to_utf8($str, $charset_in = DEFAULT_CHARSET, $decode_entities = true) { global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; $charset_in = strtoupper($charset_in); if ($charset_in == "") { $charset_in = 'UTF-8'; } $wrong_ISO8859 = false; $converted = false; if (!function_exists('iconv') && !UTF8_MBSTRING && ($charset_in == 'BIG5' || $charset_in == 'ISO-2022-JP' || $charset_in == 'ISO-2022-KR') || !function_exists('iconv') && $charset_in == 'GB2312') { // Nothing we can do here :-( // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something // and we can't use mb_convert_encoding() or iconv(); // Emit an error-message. trigger_error("Can't convert from {$charset_in} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING); return $str; } // check if we have UTF-8 or a plain ASCII string if ($charset_in == 'UTF-8' || utf8_isASCII($str)) { // we have utf-8. Just replace HTML-entities and return if ($decode_entities && preg_match('/&[#0-9a-zA-Z]+;/', $str)) { return utf8_fast_entities_to_umlauts($str); } else { // nothing to do return $str; } } // Convert $str to utf8 if (substr($charset_in, 0, 8) == 'ISO-8859') { switch ($charset_in) { case 'ISO-8859-1': $str = utf8_encode($str); break; case 'ISO-8859-2': $str = strtr($str, $iso_8859_2_to_utf8); break; case 'ISO-8859-3': $str = strtr($str, $iso_8859_3_to_utf8); break; case 'ISO-8859-4': $str = strtr($str, $iso_8859_4_to_utf8); break; case 'ISO-8859-5': $str = strtr($str, $iso_8859_5_to_utf8); break; case 'ISO-8859-6': $str = strtr($str, $iso_8859_6_to_utf8); break; case 'ISO-8859-7': $str = strtr($str, $iso_8859_7_to_utf8); break; case 'ISO-8859-8': $str = strtr($str, $iso_8859_8_to_utf8); break; case 'ISO-8859-9': $str = strtr($str, $iso_8859_9_to_utf8); break; case 'ISO-8859-10': $str = strtr($str, $iso_8859_10_to_utf8); break; case 'ISO-8859-11': $str = strtr($str, $iso_8859_11_to_utf8); break; default: $wrong_ISO8859 = true; } if (!$wrong_ISO8859) { $converted = true; } } if (!$converted && UTF8_MBSTRING && $charset_in != 'GB2312') { // $charset is neither UTF-8 nor a known ISO-8859... // Try mb_convert_encoding() - but there's no GB2312 encoding in php's mb_* functions $str = mb_convert_encoding($str, 'UTF-8', $charset_in); $converted = true; } elseif (!$converted) { // Try iconv if (function_exists('iconv')) { $str = iconv($charset_in, 'UTF-8', $str); $converted = true; } } if ($converted) { // we have utf-8, now replace HTML-entities and return if ($decode_entities && preg_match('/&[#0-9a-zA-Z]+;/', $str)) { $str = utf8_fast_entities_to_umlauts($str); } return $str; } // Nothing we can do here :-( // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something // and we can't use mb_convert_encoding() or iconv(); // Emit an error-message. trigger_error("Can't convert from {$charset_in} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING); return $str; }
/** * Encodes an email address header * * Unicode characters will be deaccented and encoded * quoted_printable for headers. * Addresses may not contain Non-ASCII data! * * Example: * mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc"); * * @param string $string Multiple adresses separated by commas * @param string $header Name of the header (To,Bcc,Cc,...) * @param boolean $names Allow named Recipients? */ function mail_encode_address($string, $header = '', $names = true) { $headers = ''; $parts = explode(',', $string); foreach ($parts as $part) { $part = trim($part); // parse address if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) { $text = trim($matches[1]); $addr = $matches[2]; } else { $addr = $part; } // skip empty ones if (empty($addr)) { continue; } // FIXME: is there a way to encode the localpart of a emailaddress? if (!utf8_isASCII($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1); continue; } if (!mail_isvalid($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1); continue; } // text was given if (!empty($text) && $names) { // add address quotes $addr = "<{$addr}>"; if (defined('MAILHEADER_ASCIIONLY')) { $text = utf8_deaccent($text); $text = utf8_strip($text); } if (!utf8_isASCII($text)) { // put the quotes outside as in =?UTF-8?Q?"Elan Ruusam=C3=A4e"?= vs "=?UTF-8?Q?Elan Ruusam=C3=A4e?=" if (preg_match('/^"(.+)"$/', $text, $matches)) { $text = '"=?UTF-8?Q?' . mail_quotedprintable_encode($matches[1], 0) . '?="'; } else { $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?='; } // additionally the space character should be encoded as =20 (or each // word QP encoded separately). // however this is needed only in mail headers, not globally in mail_quotedprintable_encode(). $text = str_replace(" ", "=20", $text); } } else { $text = ''; } // add to header comma seperated if ($headers != '') { $headers .= ','; if ($header) { $headers .= MAILHEADER_EOL . ' '; } // avoid overlong mail headers } $headers .= $text . ' ' . $addr; } if (empty($headers)) { return null; } //if headername was given add it and close correctly if ($header) { $headers = $header . ': ' . $headers . MAILHEADER_EOL; } return $headers; }
/** * Cleanup and encode the headers array */ protected function cleanHeaders() { global $conf; // clean up addresses if (empty($this->headers['From'])) { $this->from($conf['mailfrom']); } $addrs = array('To', 'From', 'Cc', 'Bcc', 'Reply-To', 'Sender'); foreach ($addrs as $addr) { if (isset($this->headers[$addr])) { $this->headers[$addr] = $this->cleanAddress($this->headers[$addr]); } } if (isset($this->headers['Subject'])) { // add prefix to subject if (empty($conf['mailprefix'])) { if (utf8_strlen($conf['title']) < 20) { $prefix = '[' . $conf['title'] . ']'; } else { $prefix = '[' . utf8_substr($conf['title'], 0, 20) . '...]'; } } else { $prefix = '[' . $conf['mailprefix'] . ']'; } $len = strlen($prefix); if (substr($this->headers['Subject'], 0, $len) != $prefix) { $this->headers['Subject'] = $prefix . ' ' . $this->headers['Subject']; } // encode subject if (defined('MAILHEADER_ASCIIONLY')) { $this->headers['Subject'] = utf8_deaccent($this->headers['Subject']); $this->headers['Subject'] = utf8_strip($this->headers['Subject']); } if (!utf8_isASCII($this->headers['Subject'])) { $this->headers['Subject'] = '=?UTF-8?B?' . base64_encode($this->headers['Subject']) . '?='; } } }
/** * Encodes an email address header * * Unicode characters will be deaccented and encoded * quoted_printable for headers. * Addresses may not contain Non-ASCII data! * * Example: * mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc"); * * @param string $string Multiple adresses separated by commas * @param string $header Name of the header (To,Bcc,Cc,...) * @param boolean $names Allow named Recipients? */ function mail_encode_address($string, $header = '', $names = true) { $headers = ''; $parts = split(',', $string); foreach ($parts as $part) { $part = trim($part); // parse address if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) { $text = trim($matches[1]); $addr = $matches[2]; } else { $addr = $part; } // skip empty ones if (empty($addr)) { continue; } // FIXME: is there a way to encode the localpart of a emailaddress? if (!utf8_isASCII($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1); continue; } if (!mail_isvalid($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1); continue; } // text was given if (!empty($text) && $names) { // add address quotes $addr = "<{$addr}>"; if (defined('MAILHEADER_ASCIIONLY')) { $text = utf8_deaccent($text); $text = utf8_strip($text); } if (!utf8_isASCII($text)) { $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?='; } } else { $text = ''; } // add to header comma seperated and in new line to avoid too long headers if ($headers != '') { $headers .= ',' . MAILHEADER_EOL . ' '; } $headers .= $text . ' ' . $addr; } if (empty($headers)) { return null; } //if headername was given add it and close correctly if ($header) { $headers = $header . ': ' . $headers . MAILHEADER_EOL; } return $headers; }
function utf8_to_charset($str, $charset_out = DEFAULT_CHARSET) { global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; $charset_out = strtoupper($charset_out); $wrong_ISO8859 = false; $converted = false; if (!function_exists('iconv') && !UTF8_MBSTRING && ($charset_out == 'BIG5' || $charset_out == 'ISO-2022-JP' || $charset_out == 'ISO-2022-KR') || !function_exists('iconv') && $charset_out == 'GB2312') { // Nothing we can do here :-( // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something // and we can't use mb_convert_encoding() or iconv(); // Emit an error-message. trigger_error("Can't convert into {$charset_out} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING); return $str; } //( !function_exists( 'iconv' ) && !UTF8_MBSTRING && ( $charset_out == 'BIG5' || $charset_out == 'ISO-2022-JP' || $charset_out == 'ISO-2022-KR' ) ) || ( !function_exists( 'iconv' ) && $charset_out == 'GB2312' ) // the string comes from charset_to_utf8(), so we can skip this // replace HTML-entities first //if(preg_match('/&[#0-9a-zA-Z]+;/',$str)) // $str = utf8_entities_to_umlauts($str); // check if we need to convert if ($charset_out == 'UTF-8' || utf8_isASCII($str)) { // Nothing to do. Just return return $str; } //$charset_out == 'UTF-8' || utf8_isASCII( $str ) // Convert $str to $charset_out if (substr($charset_out, 0, 8) == 'ISO-8859') { switch ($charset_out) { case 'ISO-8859-1': $str = utf8_decode($str); break; case 'ISO-8859-2': $str = strtr($str, $utf8_to_iso_8859_2); break; case 'ISO-8859-3': $str = strtr($str, $utf8_to_iso_8859_3); break; case 'ISO-8859-4': $str = strtr($str, $utf8_to_iso_8859_4); break; case 'ISO-8859-5': $str = strtr($str, $utf8_to_iso_8859_5); break; case 'ISO-8859-6': $str = strtr($str, $utf8_to_iso_8859_6); break; case 'ISO-8859-7': $str = strtr($str, $utf8_to_iso_8859_7); break; case 'ISO-8859-8': $str = strtr($str, $utf8_to_iso_8859_8); break; case 'ISO-8859-9': $str = strtr($str, $utf8_to_iso_8859_9); break; case 'ISO-8859-10': $str = strtr($str, $utf8_to_iso_8859_10); break; case 'ISO-8859-11': $str = strtr($str, $utf8_to_iso_8859_11); break; default: $wrong_ISO8859 = true; } //$charset_out if (!$wrong_ISO8859) { $converted = true; } } //substr( $charset_out, 0, 8 ) == 'ISO-8859' if (!$converted && UTF8_MBSTRING && $charset_out != 'GB2312') { // $charset is neither UTF-8 nor a known ISO-8859... // Try mb_convert_encoding() - but there's no GB2312 encoding in php's mb_* functions $str = mb_convert_encoding($str, $charset_out, 'UTF-8'); $converted = true; } elseif (!$converted) { if (function_exists('iconv')) { $str = iconv('UTF-8', $charset_out, $str); $converted = true; } //function_exists( 'iconv' ) } //!$converted if ($converted) { return $str; } //$converted // Nothing we can do here :-( // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something // and we can't use mb_convert_encoding() or iconv(); // Emit an error-message. trigger_error("Can't convert into {$charset_out} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING); return $str; }
/** * Romanize a non-latin string * * @author Andreas Gohr <*****@*****.**> */ function utf8_romanize($string) { if (utf8_isASCII($string)) { return $string; // nothing to do } return strtr($string, Koch\Localization\UTF8\CharacterTable::romanize()); }
function utf8_isASCII($str) { return utf8_isASCII($str); }