コード例 #1
0
ファイル: output.php プロジェクト: WurdahMekanik/hlf-ndxz
/**
 * Romanize a non-latin string
 *
 * @author Andreas Gohr <*****@*****.**>
 */
function utf8Romanize($string)
{
    if (utf8_isASCII($string)) {
        return $string;
        //nothing to do
    }
    $romanize = romanizeFile(null);
    return strtr($string, $romanize);
}
コード例 #2
0
function utf8_romanize($string)
{
    if (utf8_isASCII($string)) {
        return $string;
    }
    //nothing to do
    global $UTF8_ROMANIZATION;
    // see: http://php.net/manual/de/function.strtr.php
    return strtr($string, $UTF8_ROMANIZATION);
}
コード例 #3
0
ファイル: utf8.php プロジェクト: reeze/dokuwiki
 /**
  * Romanize a non-latin string
  *
  * @author Andreas Gohr <*****@*****.**>
  */
 function utf8_romanize($string)
 {
     if (utf8_isASCII($string)) {
         return $string;
     }
     //nothing to do
     global $UTF8_ROMANIZATION;
     return strtr($string, $UTF8_ROMANIZATION);
 }
コード例 #4
0
function charset_to_utf8($str, $charset_in = DEFAULT_CHARSET, $decode_entities = true)
{
    global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8;
    $charset_in = strtoupper($charset_in);
    if ($charset_in == "") {
        $charset_in = 'UTF-8';
    }
    $wrong_ISO8859 = false;
    $converted = false;
    if (!function_exists('iconv') && !UTF8_MBSTRING && ($charset_in == 'BIG5' || $charset_in == 'ISO-2022-JP' || $charset_in == 'ISO-2022-KR') || !function_exists('iconv') && $charset_in == 'GB2312') {
        // Nothing we can do here :-(
        // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something
        // and we can't use mb_convert_encoding() or iconv();
        // Emit an error-message.
        trigger_error("Can't convert from {$charset_in} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING);
        return $str;
    }
    // check if we have UTF-8 or a plain ASCII string
    if ($charset_in == 'UTF-8' || utf8_isASCII($str)) {
        // we have utf-8. Just replace HTML-entities and return
        if ($decode_entities && preg_match('/&[#0-9a-zA-Z]+;/', $str)) {
            return utf8_fast_entities_to_umlauts($str);
        } else {
            // nothing to do
            return $str;
        }
    }
    // Convert $str to utf8
    if (substr($charset_in, 0, 8) == 'ISO-8859') {
        switch ($charset_in) {
            case 'ISO-8859-1':
                $str = utf8_encode($str);
                break;
            case 'ISO-8859-2':
                $str = strtr($str, $iso_8859_2_to_utf8);
                break;
            case 'ISO-8859-3':
                $str = strtr($str, $iso_8859_3_to_utf8);
                break;
            case 'ISO-8859-4':
                $str = strtr($str, $iso_8859_4_to_utf8);
                break;
            case 'ISO-8859-5':
                $str = strtr($str, $iso_8859_5_to_utf8);
                break;
            case 'ISO-8859-6':
                $str = strtr($str, $iso_8859_6_to_utf8);
                break;
            case 'ISO-8859-7':
                $str = strtr($str, $iso_8859_7_to_utf8);
                break;
            case 'ISO-8859-8':
                $str = strtr($str, $iso_8859_8_to_utf8);
                break;
            case 'ISO-8859-9':
                $str = strtr($str, $iso_8859_9_to_utf8);
                break;
            case 'ISO-8859-10':
                $str = strtr($str, $iso_8859_10_to_utf8);
                break;
            case 'ISO-8859-11':
                $str = strtr($str, $iso_8859_11_to_utf8);
                break;
            default:
                $wrong_ISO8859 = true;
        }
        if (!$wrong_ISO8859) {
            $converted = true;
        }
    }
    if (!$converted && UTF8_MBSTRING && $charset_in != 'GB2312') {
        // $charset is neither UTF-8 nor a known ISO-8859...
        // Try mb_convert_encoding() - but there's no GB2312 encoding in php's mb_* functions
        $str = mb_convert_encoding($str, 'UTF-8', $charset_in);
        $converted = true;
    } elseif (!$converted) {
        // Try iconv
        if (function_exists('iconv')) {
            $str = iconv($charset_in, 'UTF-8', $str);
            $converted = true;
        }
    }
    if ($converted) {
        // we have utf-8, now replace HTML-entities and return
        if ($decode_entities && preg_match('/&[#0-9a-zA-Z]+;/', $str)) {
            $str = utf8_fast_entities_to_umlauts($str);
        }
        return $str;
    }
    // Nothing we can do here :-(
    // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something
    // and we can't use mb_convert_encoding() or iconv();
    // Emit an error-message.
    trigger_error("Can't convert from {$charset_in} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING);
    return $str;
}
コード例 #5
0
ファイル: mail.php プロジェクト: yjliugit/dokuwiki
/**
 * Encodes an email address header
 *
 * Unicode characters will be deaccented and encoded
 * quoted_printable for headers.
 * Addresses may not contain Non-ASCII data!
 *
 * Example:
 *   mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc");
 *
 * @param string  $string Multiple adresses separated by commas
 * @param string  $header Name of the header (To,Bcc,Cc,...)
 * @param boolean $names  Allow named Recipients?
 */
function mail_encode_address($string, $header = '', $names = true)
{
    $headers = '';
    $parts = explode(',', $string);
    foreach ($parts as $part) {
        $part = trim($part);
        // parse address
        if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) {
            $text = trim($matches[1]);
            $addr = $matches[2];
        } else {
            $addr = $part;
        }
        // skip empty ones
        if (empty($addr)) {
            continue;
        }
        // FIXME: is there a way to encode the localpart of a emailaddress?
        if (!utf8_isASCII($addr)) {
            msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1);
            continue;
        }
        if (!mail_isvalid($addr)) {
            msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1);
            continue;
        }
        // text was given
        if (!empty($text) && $names) {
            // add address quotes
            $addr = "<{$addr}>";
            if (defined('MAILHEADER_ASCIIONLY')) {
                $text = utf8_deaccent($text);
                $text = utf8_strip($text);
            }
            if (!utf8_isASCII($text)) {
                // put the quotes outside as in =?UTF-8?Q?"Elan Ruusam=C3=A4e"?= vs "=?UTF-8?Q?Elan Ruusam=C3=A4e?="
                if (preg_match('/^"(.+)"$/', $text, $matches)) {
                    $text = '"=?UTF-8?Q?' . mail_quotedprintable_encode($matches[1], 0) . '?="';
                } else {
                    $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?=';
                }
                // additionally the space character should be encoded as =20 (or each
                // word QP encoded separately).
                // however this is needed only in mail headers, not globally in mail_quotedprintable_encode().
                $text = str_replace(" ", "=20", $text);
            }
        } else {
            $text = '';
        }
        // add to header comma seperated
        if ($headers != '') {
            $headers .= ',';
            if ($header) {
                $headers .= MAILHEADER_EOL . ' ';
            }
            // avoid overlong mail headers
        }
        $headers .= $text . ' ' . $addr;
    }
    if (empty($headers)) {
        return null;
    }
    //if headername was given add it and close correctly
    if ($header) {
        $headers = $header . ': ' . $headers . MAILHEADER_EOL;
    }
    return $headers;
}
コード例 #6
0
ファイル: Mailer.class.php プロジェクト: boycaught/dokuwiki
 /**
  * Cleanup and encode the headers array
  */
 protected function cleanHeaders()
 {
     global $conf;
     // clean up addresses
     if (empty($this->headers['From'])) {
         $this->from($conf['mailfrom']);
     }
     $addrs = array('To', 'From', 'Cc', 'Bcc', 'Reply-To', 'Sender');
     foreach ($addrs as $addr) {
         if (isset($this->headers[$addr])) {
             $this->headers[$addr] = $this->cleanAddress($this->headers[$addr]);
         }
     }
     if (isset($this->headers['Subject'])) {
         // add prefix to subject
         if (empty($conf['mailprefix'])) {
             if (utf8_strlen($conf['title']) < 20) {
                 $prefix = '[' . $conf['title'] . ']';
             } else {
                 $prefix = '[' . utf8_substr($conf['title'], 0, 20) . '...]';
             }
         } else {
             $prefix = '[' . $conf['mailprefix'] . ']';
         }
         $len = strlen($prefix);
         if (substr($this->headers['Subject'], 0, $len) != $prefix) {
             $this->headers['Subject'] = $prefix . ' ' . $this->headers['Subject'];
         }
         // encode subject
         if (defined('MAILHEADER_ASCIIONLY')) {
             $this->headers['Subject'] = utf8_deaccent($this->headers['Subject']);
             $this->headers['Subject'] = utf8_strip($this->headers['Subject']);
         }
         if (!utf8_isASCII($this->headers['Subject'])) {
             $this->headers['Subject'] = '=?UTF-8?B?' . base64_encode($this->headers['Subject']) . '?=';
         }
     }
 }
コード例 #7
0
/**
 * Encodes an email address header
 *
 * Unicode characters will be deaccented and encoded
 * quoted_printable for headers.
 * Addresses may not contain Non-ASCII data!
 *
 * Example:
 *   mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc");
 *
 * @param string  $string Multiple adresses separated by commas
 * @param string  $header Name of the header (To,Bcc,Cc,...)
 * @param boolean $names  Allow named Recipients?
 */
function mail_encode_address($string, $header = '', $names = true)
{
    $headers = '';
    $parts = split(',', $string);
    foreach ($parts as $part) {
        $part = trim($part);
        // parse address
        if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) {
            $text = trim($matches[1]);
            $addr = $matches[2];
        } else {
            $addr = $part;
        }
        // skip empty ones
        if (empty($addr)) {
            continue;
        }
        // FIXME: is there a way to encode the localpart of a emailaddress?
        if (!utf8_isASCII($addr)) {
            msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1);
            continue;
        }
        if (!mail_isvalid($addr)) {
            msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1);
            continue;
        }
        // text was given
        if (!empty($text) && $names) {
            // add address quotes
            $addr = "<{$addr}>";
            if (defined('MAILHEADER_ASCIIONLY')) {
                $text = utf8_deaccent($text);
                $text = utf8_strip($text);
            }
            if (!utf8_isASCII($text)) {
                $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?=';
            }
        } else {
            $text = '';
        }
        // add to header comma seperated and in new line to avoid too long headers
        if ($headers != '') {
            $headers .= ',' . MAILHEADER_EOL . ' ';
        }
        $headers .= $text . ' ' . $addr;
    }
    if (empty($headers)) {
        return null;
    }
    //if headername was given add it and close correctly
    if ($header) {
        $headers = $header . ': ' . $headers . MAILHEADER_EOL;
    }
    return $headers;
}
コード例 #8
0
function utf8_to_charset($str, $charset_out = DEFAULT_CHARSET)
{
    global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11;
    $charset_out = strtoupper($charset_out);
    $wrong_ISO8859 = false;
    $converted = false;
    if (!function_exists('iconv') && !UTF8_MBSTRING && ($charset_out == 'BIG5' || $charset_out == 'ISO-2022-JP' || $charset_out == 'ISO-2022-KR') || !function_exists('iconv') && $charset_out == 'GB2312') {
        // Nothing we can do here :-(
        // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something
        // and we can't use mb_convert_encoding() or iconv();
        // Emit an error-message.
        trigger_error("Can't convert into {$charset_out} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING);
        return $str;
    }
    //( !function_exists( 'iconv' ) && !UTF8_MBSTRING && ( $charset_out == 'BIG5' || $charset_out == 'ISO-2022-JP' || $charset_out == 'ISO-2022-KR' ) ) || ( !function_exists( 'iconv' ) && $charset_out == 'GB2312' )
    // the string comes from charset_to_utf8(), so we can skip this
    // replace HTML-entities first
    //if(preg_match('/&[#0-9a-zA-Z]+;/',$str))
    //	$str = utf8_entities_to_umlauts($str);
    // check if we need to convert
    if ($charset_out == 'UTF-8' || utf8_isASCII($str)) {
        // Nothing to do. Just return
        return $str;
    }
    //$charset_out == 'UTF-8' || utf8_isASCII( $str )
    // Convert $str to $charset_out
    if (substr($charset_out, 0, 8) == 'ISO-8859') {
        switch ($charset_out) {
            case 'ISO-8859-1':
                $str = utf8_decode($str);
                break;
            case 'ISO-8859-2':
                $str = strtr($str, $utf8_to_iso_8859_2);
                break;
            case 'ISO-8859-3':
                $str = strtr($str, $utf8_to_iso_8859_3);
                break;
            case 'ISO-8859-4':
                $str = strtr($str, $utf8_to_iso_8859_4);
                break;
            case 'ISO-8859-5':
                $str = strtr($str, $utf8_to_iso_8859_5);
                break;
            case 'ISO-8859-6':
                $str = strtr($str, $utf8_to_iso_8859_6);
                break;
            case 'ISO-8859-7':
                $str = strtr($str, $utf8_to_iso_8859_7);
                break;
            case 'ISO-8859-8':
                $str = strtr($str, $utf8_to_iso_8859_8);
                break;
            case 'ISO-8859-9':
                $str = strtr($str, $utf8_to_iso_8859_9);
                break;
            case 'ISO-8859-10':
                $str = strtr($str, $utf8_to_iso_8859_10);
                break;
            case 'ISO-8859-11':
                $str = strtr($str, $utf8_to_iso_8859_11);
                break;
            default:
                $wrong_ISO8859 = true;
        }
        //$charset_out
        if (!$wrong_ISO8859) {
            $converted = true;
        }
    }
    //substr( $charset_out, 0, 8 ) == 'ISO-8859'
    if (!$converted && UTF8_MBSTRING && $charset_out != 'GB2312') {
        // $charset is neither UTF-8 nor a known ISO-8859...
        // Try mb_convert_encoding() - but there's no GB2312 encoding in php's mb_* functions
        $str = mb_convert_encoding($str, $charset_out, 'UTF-8');
        $converted = true;
    } elseif (!$converted) {
        if (function_exists('iconv')) {
            $str = iconv('UTF-8', $charset_out, $str);
            $converted = true;
        }
        //function_exists( 'iconv' )
    }
    //!$converted
    if ($converted) {
        return $str;
    }
    //$converted
    // Nothing we can do here :-(
    // Charset is one of those obscure ISO-2022... or BIG5, GB2312 or something
    // and we can't use mb_convert_encoding() or iconv();
    // Emit an error-message.
    trigger_error("Can't convert into {$charset_out} without mb_convert_encoding() or iconv(). Use UTF-8 instead.", E_USER_WARNING);
    return $str;
}
コード例 #9
0
 /**
  * Romanize a non-latin string
  *
  * @author Andreas Gohr <*****@*****.**>
  */
 function utf8_romanize($string)
 {
     if (utf8_isASCII($string)) {
         return $string;
         // nothing to do
     }
     return strtr($string, Koch\Localization\UTF8\CharacterTable::romanize());
 }
コード例 #10
0
 function utf8_isASCII($str)
 {
     return utf8_isASCII($str);
 }