/** * Check for UTF-8 URLs; Internet Explorer produces these if you * type non-ASCII chars in the URL bar or follow unescaped links. * Requires urldecoded pagename. * Fixes sf.net bug #953949 * * src: languages/Language.php:checkTitleEncoding() from mediawiki */ function fixTitleEncoding($s) { global $charset; $s = trim($s); // print a warning? if (empty($s)) { return $s; } $ishigh = preg_match('/[\\x80-\\xff]/', $s); /* $isutf = ($ishigh ? preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ) : true ); */ $isutf = $ishigh ? isUtf8String($s) : true; $locharset = strtolower($charset); if ($locharset != "utf-8" and $ishigh and $isutf) { $s = charset_convert('UTF-8', $locharset, $s); } if ($locharset == "utf-8" and $ishigh and !$isutf) { return utf8_encode($s); } // Other languages can safely leave this function, or replace // it with one to detect and convert another legacy encoding. return $s; }
/** * Check for UTF-8 URLs; Internet Explorer produces these if you * type non-ASCII chars in the URL bar or follow unescaped links. * Requires urldecoded pagename. * Fixes sf.net bug #953949 * * src: languages/Language.php:checkTitleEncoding() from mediawiki */ function fixTitleEncoding($s) { global $charset; $s = trim($s); // print a warning? if (empty($s)) { return $s; } $ishigh = preg_match('/[\\x80-\\xff]/', $s); /* $isutf = ($ishigh ? preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ) : true ); */ $isutf = $ishigh ? isUtf8String($s) : true; $locharset = strtolower($charset); if ($locharset != "utf-8" and $ishigh and $isutf) { // if charset == 'iso-8859-1' then simply use utf8_decode() if ($locharset == 'iso-8859-1') { return utf8_decode($s); } else { // TODO: check for iconv support return iconv("UTF-8", $charset, $s); } } if ($locharset == "utf-8" and $ishigh and !$isutf) { return utf8_encode($s); } // Other languages can safely leave this function, or replace // it with one to detect and convert another legacy encoding. return $s; }