This expensive function tests whether or not a given character
encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
fail this test, and require special processing. Variable width
encodings shouldn't ever fail.
public static testEncodingSupportsASCII ( string $encoding, boolean $bypass = false ) : Array | ||
$encoding | string | Encoding name to test, as per iconv format |
$bypass | boolean | Whether or not to bypass the precompiled arrays. |
return | Array | of UTF-8 characters to their corresponding ASCII, which can be used to "undo" any overzealous iconv action. |
/** * Converts a string from UTF-8 based on configuration. * @note Currently, this is a lossy conversion, with unexpressable * characters being omitted. */ public static function convertFromUTF8($str, $config, $context) { $encoding = $config->get('Core.Encoding'); if ($encoding === 'utf-8') { return $str; } static $iconv = null; if ($iconv === null) { $iconv = function_exists('iconv'); } if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); } set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if ($iconv && !$config->get('Test.ForceNoIconv')) { // Undo our previous fix in convertToUTF8, otherwise iconv will barf $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); if (!$escape && !empty($ascii_fix)) { $clear_fix = array(); foreach ($ascii_fix as $utf8 => $native) { $clear_fix[$utf8] = ''; } $str = strtr($str, $clear_fix); } $str = strtr($str, array_flip($ascii_fix)); // Normal stuff $str = iconv('utf-8', $encoding . '//IGNORE', $str); restore_error_handler(); return $str; } elseif ($encoding === 'iso-8859-1') { $str = utf8_decode($str); restore_error_handler(); return $str; } trigger_error('Encoding not supported', E_USER_ERROR); }
public function assertASCIISupportCheck($enc, $ret) { $test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true); if ($test === false) { return; } $this->assertIdentical(HTMLPurifier_Encoder::testEncodingSupportsASCII($enc), $ret); $this->assertIdentical(HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true), $ret); }