Example #1
0
/**
 * Check for UTF-8 URLs; Internet Explorer produces these if you
 * type non-ASCII chars in the URL bar or follow unescaped links.
 * Requires urldecoded pagename.
 * Fixes sf.net bug #953949
 *
 * src: languages/Language.php:checkTitleEncoding() from mediawiki
 */
function fixTitleEncoding($s)
{
    global $charset;
    $s = trim($s);
    // print a warning?
    if (empty($s)) {
        return $s;
    }
    $ishigh = preg_match('/[\\x80-\\xff]/', $s);
    /*
    $isutf = ($ishigh ? preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                                    '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ) : true );
    */
    $isutf = $ishigh ? isUtf8String($s) : true;
    $locharset = strtolower($charset);
    if ($locharset != "utf-8" and $ishigh and $isutf) {
        $s = charset_convert('UTF-8', $locharset, $s);
    }
    if ($locharset == "utf-8" and $ishigh and !$isutf) {
        return utf8_encode($s);
    }
    // Other languages can safely leave this function, or replace
    // it with one to detect and convert another legacy encoding.
    return $s;
}
Example #2
0
/** 
 * Check for UTF-8 URLs; Internet Explorer produces these if you
 * type non-ASCII chars in the URL bar or follow unescaped links.
 * Requires urldecoded pagename.
 * Fixes sf.net bug #953949
 *
 * src: languages/Language.php:checkTitleEncoding() from mediawiki
 */
function fixTitleEncoding($s)
{
    global $charset;
    $s = trim($s);
    // print a warning?
    if (empty($s)) {
        return $s;
    }
    $ishigh = preg_match('/[\\x80-\\xff]/', $s);
    /*
    $isutf = ($ishigh ? preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                                    '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ) : true );
    */
    $isutf = $ishigh ? isUtf8String($s) : true;
    $locharset = strtolower($charset);
    if ($locharset != "utf-8" and $ishigh and $isutf) {
        // if charset == 'iso-8859-1' then simply use utf8_decode()
        if ($locharset == 'iso-8859-1') {
            return utf8_decode($s);
        } else {
            // TODO: check for iconv support
            return iconv("UTF-8", $charset, $s);
        }
    }
    if ($locharset == "utf-8" and $ishigh and !$isutf) {
        return utf8_encode($s);
    }
    // Other languages can safely leave this function, or replace
    // it with one to detect and convert another legacy encoding.
    return $s;
}