Example #1
2
 public static function clean($var, $charset = NULL)
 {
     if (!$charset) {
         // Use the application character set
         $charset = JsonApiApplication::$charset;
     }
     if (is_array($var) or is_object($var)) {
         foreach ($var as $key => $val) {
             // Recursion!
             $var[UTF8::clean($key)] = UTF8::clean($val);
         }
     } elseif (is_string($var) and $var !== "") {
         // Remove control characters
         $var = UTF8::strip_ascii_ctrl($var);
         if (!UTF8::is_ascii($var)) {
             // Temporarily save the mb_substitute_character() value into a variable
             $mb_substitute_character = mb_substitute_character();
             // Disable substituting illegal characters with the default '?' character
             mb_substitute_character("none");
             // convert encoding, this is expensive, used when $var is not ASCII
             $var = mb_convert_encoding($var, $charset, $charset);
             // Reset mb_substitute_character() value back to the original setting
             mb_substitute_character($mb_substitute_character);
         }
     }
     return $var;
 }
Example #2
0
 static function initMbstring()
 {
     if (extension_loaded('mbstring')) {
         if (((int) ini_get('mbstring.encoding_translation') || in_array(strtolower(ini_get('mbstring.encoding_translation')), array('on', 'yes', 'true'))) && !in_array(strtolower(ini_get('mbstring.http_input')), array('pass', '8bit', 'utf-8'))) {
             user_error('php.ini settings: Please disable mbstring.encoding_translation or set mbstring.http_input to "pass"', E_USER_WARNING);
         }
         if (MB_OVERLOAD_STRING & (int) ini_get('mbstring.func_overload')) {
             user_error('php.ini settings: Please disable mbstring.func_overload', E_USER_WARNING);
         }
         mb_regex_encoding('UTF-8');
         ini_set('mbstring.script_encoding', 'pass');
         if ('utf-8' !== strtolower(mb_internal_encoding())) {
             mb_internal_encoding('UTF-8');
             ini_set('mbstring.internal_encoding', 'UTF-8');
         }
         if ('none' !== strtolower(mb_substitute_character())) {
             mb_substitute_character('none');
             ini_set('mbstring.substitute_character', 'none');
         }
         if (!in_array(strtolower(mb_http_output()), array('pass', '8bit'))) {
             mb_http_output('pass');
             ini_set('mbstring.http_output', 'pass');
         }
         if (!in_array(strtolower(mb_language()), array('uni', 'neutral'))) {
             mb_language('uni');
             ini_set('mbstring.language', 'uni');
         }
     } else {
         if (!defined('MB_OVERLOAD_MAIL')) {
             extension_loaded('iconv') or static::initIconv();
             require __DIR__ . '/Bootup/mbstring.php';
         }
     }
 }
Example #3
0
 function __construct()
 {
     $this->charsets = array("ASMO-708" => gettext("Arabic"), "BIG5" => gettext("Chinese Traditional"), "CP1026" => gettext("IBM EBCDIC (Turkish Latin-5)"), "cp866" => gettext("Cyrillic (DOS)"), "CP870" => gettext("IBM EBCDIC (Multilingual Latin-2)"), "CISO2022JP" => gettext("Japanese (JIS-Allow 1 byte Kana)"), "DOS-720" => gettext("Arabic (DOS)"), "DOS-862" => gettext("Hebrew (DOS)"), "EBCDIC-CP-US" => gettext("IBM EBCDIC (US-Canada)"), "EUC-CN" => gettext("Chinese Simplified (EUC)"), "EUC-JP" => gettext("Japanese (EUC)"), "EUC-KR" => gettext("Korean (EUC)"), "GB2312" => gettext("Chinese Simplified (GB2312)"), "HZ-GB-2312" => gettext("Chinese Simplified (HZ)"), "IBM437" => gettext("OEM United States"), "IBM737" => gettext("Greek (DOS)"), "IBM775" => gettext("Baltic (DOS)"), "IBM850" => gettext("Western European (DOS)"), "IBM852" => gettext("Central European (DOS)"), "IBM857" => gettext("Turkish (DOS)"), "IBM861" => gettext("Icelandic (DOS)"), "IBM869" => gettext("Greek, Modern (DOS)"), "ISO-2022-JP" => gettext("Japanese (JIS)"), "ISO-2022-JP" => gettext("Japanese (JIS-Allow 1 byte Kana - SO/SI)"), "ISO-2022-KR" => gettext("Korean (ISO)"), "ISO-8859-1" => gettext("Western European (ISO)"), "ISO-8859-15" => gettext("Latin 9 (ISO)"), "ISO-8859-2" => gettext("Central European (ISO)"), "ISO-8859-3" => gettext("Latin 3 (ISO)"), "ISO-8859-4" => gettext("Baltic (ISO)"), "ISO-8859-5" => gettext("Cyrillic (ISO)"), "ISO-8859-6" => gettext("Arabic (ISO)"), "ISO-8859-7" => gettext("Greek (ISO)"), "ISO-8859-8" => gettext("Hebrew (ISO-Visual)"), "ISO-8859-8-i" => gettext("Hebrew (ISO-Logical)"), "ISO-8859-9" => gettext("Turkish (ISO)"), "JOHAB" => gettext("Korean (Johab)"), "KOi8-R" => gettext("Cyrillic (KOI8-R)"), "KOi8-U" => gettext("Cyrillic (KOI8-U)"), "KS_C_5601-1987" => gettext("Korean"), "MACINTOSH" => gettext("Western European (MAC)"), "SHIFT_JIS" => gettext("Japanese (Shift-JIS)"), "UNICODE" => gettext("Unicode"), "UNICODEFFFE" => gettext("Unicode (Big-Endian)"), "US-ASCII" => gettext("US-ASCII"), "UTF-7" => gettext("Unicode (UTF-7)"), "UTF-8" => gettext("Unicode (UTF-8)"), "WINDOWS-1250" => gettext("Central European (Windows)"), "WINDOWS-1251" => gettext("Cyrillic (Windows)"), "WINDOWS-1252" => gettext("Western European (Windows)"), "WINDOWS-1253" => gettext("Greek (Windows)"), "WINDOWS-1254" => gettext("Turkish (Windows)"), "WINDOWS-1255" => gettext("Hebrew (Windows)"), "WINDOWS-1256" => gettext("Arabic (Windows)"), "WINDOWS-1257" => gettext("Baltic (Windows)"), "WINDOWS-1258" => gettext("Vietnamese (Windows)"), "WINDOWS-874" => gettext("Thai (Windows)"));
     // prune the list to supported character sets
     $this->iconv_sets = array();
     $this->mb_sets = array();
     if (function_exists('mb_convert_encoding')) {
         @mb_substitute_character('none');
         if (function_exists('mb_list_encodings')) {
             $list = mb_list_encodings();
         } else {
             $list = array("pass", "auto", "byte2be", "byte2le", "byte4be", "byte4le", "BASE64", "UUENCODE", "HTML-ENTITIES", "Quoted-Printable", "7bit", "8bit", "UCS-4", "UCS-4BE", "UCS-4LE", "UCS-2", "UCS-2BE", "UCS-2LE", "UTF-32", "UTF-32BE", "UTF-32LE", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-8", "UTF-7", "UTF7-IMAP", "ASCII", "EUC-JP", "SJIS", "eucJP-win", "SJIS-win", "CP51932", "JIS", "ISO-2022-JP", "ISO-2022-JP-MS", "Windows-1252", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", "EUC-CN", "CP936", "HZ", "EUC-TW", "BIG-5", "EUC-KR", "UHC", "ISO-2022-KR", "Windows-1251", "CP866", "KOI8-R", "ArmSCII-8");
         }
         foreach ($this->charsets as $key => $encoding) {
             if (in_array($key, $list)) {
                 $this->mb_sets[$key] = $encoding;
             }
         }
     }
     if (function_exists('iconv')) {
         foreach ($this->charsets as $key => $encoding) {
             if (@iconv("UTF-8", $key, "UTF-8") !== false) {
                 $this->iconv_sets[$key] = $encoding;
             }
         }
     }
 }
function smarty_modifier_xoops_html_purifier($html, $ecoding = null, $doctype = null)
{
    require_once XOOPS_LIBRARY_PATH . '/htmlpurifier/library/HTMLPurifier.auto.php';
    $encoding = $encoding ? $encoding : _CHARSET;
    $doctypeArr = array("HTML 4.01 Strict", "HTML 4.01 Transitional", "XHTML 1.0 Strict", "XHTML 1.0 Transitional", "XHTML 1.1");
    $config = HTMLPurifier_Config::createDefault();
    if (in_array($doctype, $doctypeArr)) {
        $config->set('HTML.Doctype', $doctype);
    }
    if ($_conv = $encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) {
        $_substitute = mb_substitute_character();
        mb_substitute_character('none');
        $html = mb_convert_encoding($html, 'UTF-8', $encoding);
        $config->set('Core.Encoding', 'UTF-8');
    } else {
        $config->set('Core.Encoding', $encoding);
    }
    $purifier = new HTMLPurifier($config);
    $html = $purifier->purify($html);
    if ($_conv) {
        $html = mb_convert_encoding($html, $encoding, 'UTF-8');
        mb_substitute_character($_substitute);
    }
    return $html;
}
 function inputFilter($str)
 {
     if (is_array($str)) {
         return array_map(array($this, "inputFilter"), $str);
     }
     // 入力された絵文字はUnicodeで保存するためSJIS-win
     $str = mb_convert_kana($str, 'KVrns', 'SJIS-win');
     $sjismap = array();
     $utf8map = array();
     if ($this->is_ezweb()) {
         $sjismap = array(0xe234, 0xe272, 0xa0c, 0xffff, 0xe273, 0xe2ef, 0xa0d, 0xffff, 0xe2f0, 0xe32e, 0xa50, 0xffff, 0xe32f, 0xe342, 0xa51, 0xffff, 0xe468, 0xe4a6, 0xad8, 0xffff, 0xe4a7, 0xe523, 0xad9, 0xffff, 0xe524, 0xe562, 0xb1c, 0xffff, 0xe563, 0xe5df, 0xb1d, 0xffff);
         $utf8map = array(0xec40, 0xecfc, 0x0, 0xffff, 0xed40, 0xed93, 0x0, 0xffff, 0xef40, 0xeffc, 0x0, 0xffff, 0xf040, 0xf0fc, 0x0, 0xffff);
         $str = mb_encode_numericentity($str, $sjismap, 'SJIS-win');
         $str = mb_convert_encoding($str, "UTF-8", "SJIS-win");
         $str = mb_decode_numericentity($str, $utf8map, 'UTF-8');
     } elseif ($this->is_softbank()) {
         $backup = mb_substitute_character();
         mb_substitute_character('long');
         $str = mb_convert_encoding($str, 'UTF-8', 'SJIS');
         mb_substitute_character($backup);
         $pattern = '/BAD\\+([0-9A-F]{4})/';
         $callback = array($this, '_softbank_fallbackSjisToUtf8');
         $str = preg_replace_callback($pattern, $callback, $str);
     } else {
         $str = mb_convert_encoding($str, "UTF-8", "SJIS-win");
     }
     $str = trim($str);
     //$str = h($str);
     return $str;
 }
Example #6
0
 function u2b($str, $charset = 'BIG5')
 {
     mb_regex_encoding($charset);
     //宣告 要進行 regex 的多位元編碼轉換格式 為 $charset
     mb_substitute_character('long');
     //宣告 缺碼字改以U+16進位碼為標記取代
     $str = mb_convert_encoding($str, $charset, 'UTF-8');
     $str = preg_replace('/U\\+([0-9A-F]{4})/e', '"&#".intval("\\1",16).";"', $str);
     //將U+16進位碼標記轉換為UnicodeHTML碼
     return $str;
 }
Example #7
0
 /**
  *  コンストラクタ
  *  @param  string  $cat    カテゴリ
  */
 public function __construct($cat)
 {
     mb_internal_encoding(Todo::ENCODING);
     mb_regex_encoding(Todo::ENCODING);
     ini_set('default_charset', Todo::ENCODING);
     //HTTPヘッダーでの文字コード指定
     ini_set('mbstring.strict_detection', true);
     mb_substitute_character(0x5f);
     //変換できない文字は"_"にする
     $this->cat = $this->_encode($cat);
 }
Example #8
0
 function __construct($data)
 {
     libxml_use_internal_errors(true);
     libxml_clear_errors();
     $this->doc = new DOMDocument();
     $this->doc->loadXML($data);
     mb_substitute_character("none");
     $error = libxml_get_last_error();
     // libxml compiled without iconv?
     if ($error && $error->code == 32) {
         $data = $this->normalize_encoding($data);
         if ($data) {
             libxml_clear_errors();
             $this->doc = new DOMDocument();
             $this->doc->loadXML($data);
             $error = libxml_get_last_error();
         }
     }
     // some terrible invalid unicode entity?
     if ($error) {
         foreach (libxml_get_errors() as $err) {
             if ($err->code == 9) {
                 // if the source feed is not in utf8, next conversion will fail
                 $data = $this->normalize_encoding($data);
                 // remove dangling bytes
                 $data = mb_convert_encoding($data, 'UTF-8', 'UTF-8');
                 // apparently not all UTF-8 characters are valid for XML
                 $data = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}]+/u', ' ', $data);
                 if ($data) {
                     libxml_clear_errors();
                     $this->doc = new DOMDocument();
                     $this->doc->loadXML($data);
                     $error = libxml_get_last_error();
                 }
                 break;
             }
         }
     }
     if ($error) {
         foreach (libxml_get_errors() as $error) {
             if ($error->level == LIBXML_ERR_FATAL) {
                 if (!isset($this->error)) {
                     //currently only the first error is reported
                     $this->error = $this->format_error($error);
                 }
                 $this->libxml_errors[] = $this->format_error($error);
             }
         }
     }
     libxml_clear_errors();
     $this->items = array();
 }
Example #9
0
 /**
  * Class constructor
  *
  * Determines if UTF-8 support is to be enabled.
  *
  * @return  void
  */
 public function __construct()
 {
     $charset = strtoupper(Config::get('main')->charset);
     ini_set('default_charset', $charset);
     /*
      * Configure mbstring and/or iconv if they are enabled
      * and set MB_ENABLED and ICONV_ENABLED constants, so
      * that we don't repeatedly do extension_loaded() or
      * function_exists() calls.
      */
     if (extension_loaded('mbstring')) {
         define('MB_ENABLED', TRUE);
         // mbstring.internal_encoding is deprecated starting with PHP 5.6
         // and it's usage triggers E_DEPRECATED messages.
         if (!Core::isPHP('5.6')) {
             @ini_set('mbstring.internal_encoding', $charset);
         } else {
             mb_internal_encoding($charset);
         }
         // This is required for mb_convert_encoding() to strip invalid characters.
         // That's utilized by Utf8, but it's also done for consistency with iconv.
         mb_substitute_character('none');
     } else {
         define('MB_ENABLED', FALSE);
     }
     // There's an ICONV_IMPL constant, but the PHP manual says that using
     // iconv's predefined constants is "strongly discouraged".
     if (extension_loaded('iconv')) {
         define('ICONV_ENABLED', TRUE);
         // iconv.internal_encoding is deprecated starting with PHP 5.6
         // and it's usage triggers E_DEPRECATED messages.
         if (!Core::isPHP(5.6)) {
             @ini_set('iconv.internal_encoding', $charset);
         } else {
             ini_set('default_encoding', $charset);
         }
     } else {
         define('ICONV_ENABLED', FALSE);
     }
     if (Core::isPHP('5.6')) {
         ini_set('php.internal_encoding', $charset);
     }
     if (defined('PREG_BAD_UTF8_ERROR') && (ICONV_ENABLED === TRUE or MB_ENABLED === TRUE) && strtoupper($charset) === 'UTF-8') {
         define('UTF8_ENABLED', TRUE);
         Logger::log('UTF-8 Support Enabled');
     } else {
         define('UTF8_ENABLED', FALSE);
         Logger::log('UTF-8 Support Disabled');
     }
 }
Example #10
0
 /**
  * AbstractDiff constructor.
  *
  * @param string     $oldText
  * @param string     $newText
  * @param string     $encoding
  * @param null|array $specialCaseTags
  * @param null|bool  $groupDiffs
  */
 public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
 {
     mb_substitute_character(0x20);
     $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
     if ($specialCaseTags !== null) {
         $this->config->setSpecialCaseTags($specialCaseTags);
     }
     if ($groupDiffs !== null) {
         $this->config->setGroupDiffs($groupDiffs);
     }
     $this->oldText = $oldText;
     $this->newText = $newText;
     $this->content = '';
 }
 public function format($response)
 {
     $this->inputCharset = @$response->data['inputCharset'] ?: Yii::$app->charset;
     $this->outputCharset = @$response->data['outputCharset'] ?: Yii::$app->charset;
     // 代替文字
     $substitute = new Resource(mb_substitute_character(), function ($old) {
         mb_substitute_character($old);
     });
     mb_substitute_character(0x3013);
     $tmpfile = tmpfile();
     foreach ($response->data['rows'] as $row) {
         fwrite($tmpfile, $this->formatRow($row) . "\r\n");
     }
     fseek($tmpfile, 0, SEEK_SET);
     $response->content = null;
     $response->stream = $tmpfile;
 }
Example #12
0
 /**
  * @return string
  *
  * @throws Backend\SourceFileException
  */
 public function getSource()
 {
     $code = file_get_contents($this->fileInfo->getPathname());
     $info = new \finfo();
     $encoding = $info->file($this->fileInfo, FILEINFO_MIME_ENCODING);
     if (strtolower($encoding) != 'utf-8') {
         try {
             $code = iconv($encoding, 'UTF-8//TRANSLIT', $code);
         } catch (\ErrorException $e) {
             throw new SourceFileException('Encoding error - conversion to UTF-8 failed', SourceFileException::BadEncoding, $e);
         }
     }
     // This is a workaround to filter out leftover invalid UTF-8 byte sets
     // even if the source looks like it's UTF-8 already
     mb_substitute_character('none');
     $cleanCode = mb_convert_encoding($code, 'UTF-8', 'UTF-8');
     if ($cleanCode != $code) {
         throw new SourceFileException('Encoding error - invalid UTF-8 bytes found', SourceFileException::InvalidDataBytes);
     }
     return $cleanCode;
 }
Example #13
0
 /**
  * Perform initialization required for the string wrapper library.
  * @return null
  */
 static function init()
 {
     $clientCharset = strtolower_codesafe(Config::getVar('i18n', 'client_charset'));
     // Check if mbstring is installed (requires PHP >= 4.3.0)
     if (String::hasMBString()) {
         // mbstring routines are available
         define('ENABLE_MBSTRING', true);
         // Set up required ini settings for mbstring
         // FIXME Do any other mbstring settings need to be set?
         mb_internal_encoding($clientCharset);
         mb_substitute_character('63');
         // question mark
     }
     // Define modifier to be used in regexp_* routines
     // FIXME Should non-UTF-8 encodings be supported with mbstring?
     if ($clientCharset == 'utf-8' && String::hasPCREUTF8()) {
         define('PCRE_UTF8', 'u');
     } else {
         define('PCRE_UTF8', '');
     }
 }
Example #14
0
function safeUTF8(&$text)
{
    //when `mb_convert_encoding` is used below, we want it to use the recommended Unicode replacement character
    //rather than just "?" <stackoverflow.com/a/13695364>
    mb_substitute_character(0xfffd);
    //what's given could be any imaginable encoding, normalise it into UTF-8 though it may not yet be web-safe.
    //adapted from <php.net/mb_check_encoding#89286>, with thanks to Zegnat. this works by importing the current byte
    //stream into UTF-32 which has enough scope to contain any other encoding, then downsizing in to UTF-8
    $text = mb_convert_encoding(mb_convert_encoding($text, 'UTF-32', 'UTF-8'), 'UTF-8', 'UTF-32');
    //remove Unicode bytes unsafe for XML: <www.w3.org/TR/REC-xml/#charsets>
    $text = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+/u', '', $text);
    //remove "compatibility characters" and "permanently undefined Unicode characters",
    //see note proceeding: <www.w3.org/TR/REC-xml/#charsets>
    $text = preg_replace('/[\\x{007f}-\\x{0084}\\x{0086}-\\x{009f}\\x{FDD0}-\\x{FDEF}' . '\\x{200E}\\x{200F}\\x{202A}-\\x{202E}' . '\\x{1FFFE}\\x{1FFFF}\\x{2FFFE}\\x{2FFFF}\\x{3FFFE}\\x{3FFFF}\\x{4FFFE}\\x{4FFFF}' . '\\x{5FFFE}\\x{5FFFF}\\x{6FFFE}\\x{6FFFF}\\x{7FFFE}\\x{7FFFF}\\x{8FFFE}\\x{8FFFF}' . '\\x{9FFFE}\\x{9FFFF}\\x{AFFFE}\\x{AFFFF}\\x{BFFFE}\\x{BFFFF}\\x{CFFFE}\\x{CFFFF}' . '\\x{DFFFE}\\x{DFFFF}\\x{EFFFE}\\x{EFFFF}\\x{FFFFE}\\x{FFFFF}\\x{10FFFE}\\x{10FFFF}]+/u', '', $text);
    //TODO: strip invalid byte-sequences
    //see: http://stackoverflow.com/a/13695364
    //Some interesting references:
    //http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
    //we still need to return, despite the by-reference parameter because use of anonymous variables and functions
    //for the call will not be by-reference
    return $text;
}
Example #15
0
 public static function init()
 {
     self::$utf8validator = (bool) extension_loaded('utf8validator');
     mb_internal_encoding('UTF-8');
     mb_language('uni');
     mb_regex_encoding('UTF-8');
     mb_detect_order(array('UTF-8', 'ISO-8859-1'));
     mb_substitute_character(0xfffd);
     MCached::connect();
     $trans = MCached::get(self::TRANSKEY);
     if ($trans === MCached::NO_RESULT) {
         $win = "€" . implode('', range("‚", "Œ")) . "Ž" . implode('', range("‘", "œ")) . implode('', range("ž", "ÿ"));
         $win_iso = "�����";
         $iso = implode('', range("€", "ÿ"));
         $winlen = strlen($win);
         $winisolen = strlen($win_iso);
         $isolen = strlen($iso);
         $trans = array('iso_to_utf8' => array(), 'win_to_utf8' => array(), 'from_utf8' => array());
         for ($i = 0; $i < $isolen; $i++) {
             $utf8 = mb_convert_encoding($iso[$i], 'UTF-8', 'ISO-8859-1');
             $trans['iso_to_utf8'][$iso[$i]] = $utf8;
             $trans['from_utf8'][$utf8] = $iso[$i];
         }
         for ($i = 0; $i < $winlen; $i++) {
             $utf8 = mb_convert_encoding($win[$i], 'UTF-8', 'Windows-1252');
             $trans['win_to_utf8'][$win[$i]] = $utf8;
             $trans['from_utf8'][$utf8] = $win[$i];
         }
         for ($i = 0; $i < $winisolen; $i++) {
             $utf8 = mb_convert_encoding($win_iso[$i], 'UTF-8', 'ISO-8859-1');
             $trans['win_to_utf8'][$win_iso[$i]] = $utf8;
         }
         MCached::add(self::TRANSKEY, $trans, 86400);
     }
     self::$trans_table = $trans;
 }
Example #16
0
 function __construct()
 {
     parent::__construct();
     $this->config->load('config_main', TRUE);
     mb_language(LANG);
     ini_set('mbstring.detect_order', 'auto');
     ini_set('mbstring.http_input', 'auto');
     ini_set('mbstring.http_output', 'pass');
     ini_set('mbstring.internal_encoding', 'UTF-8');
     ini_set('mbstring.script_encoding', 'UTF-8');
     ini_set('mbstring.substitude_character', 'none');
     mb_regex_encoding("UTF-8");
     mb_substitute_character("long");
     mb_substitute_character(0x3013);
     // PHP 5.3 用
     ini_set('date.timezone', 'Asia/Ho_Chi_Minh');
     $this->load->helper(array('url', 'path', 'form', 'main'));
     // mobileMyClassライブラリ
     parse_str($_SERVER['QUERY_STRING'], $_GET);
     $this->load->library('MobileMyClass');
     $path = APPPATH . 'pear';
     set_include_path(get_include_path() . PATH_SEPARATOR . $path);
     $this->init_APP();
 }
Example #17
0
 private function SplitHTMLChunks($html, $fontstyle)
 {
     $html = str_replace(chr(160), " ", $html);
     if (strip_tags($html) == $html) {
         $html = nl2br(htmlspecialchars($html));
     } else {
         $html = preg_replace("/&(?!([a-z\\d]+|#\\d+|#x[a-f\\d]+);)/i", "&amp;", $html);
         $html = preg_replace("/<br\\s*>/i", "<br/>", $html);
     }
     mb_substitute_character("none");
     $html = mb_convert_encoding($html, "UTF-8", "UTF-8");
     if ($html == "") {
         return array(array("text" => "", "style" => $fontstyle, "newlines" => 0));
     }
     $doc = new DOMDocument();
     $doc->loadXML("<root/>");
     $f = $doc->createDocumentFragment();
     if (!$f->appendXML($html)) {
         return array();
     }
     $doc->documentElement->appendChild($f);
     $cur = $doc->documentElement;
     $hs = array();
     $inpara = null;
     $chunks = array();
     while ($cur != null) {
         if ($cur->nodeType == XML_TEXT_NODE) {
             if ($inpara === 0) {
                 $chunks[count($chunks) - 1]["newlines"] += 2;
             }
             $inpara = 1;
             if (count($hs) > 0) {
                 $style = array_merge($fontstyle, array("style" => implode("", $hs)));
                 if (isset($fontstyle["style"])) {
                     $style["style"] .= $fontstyle["style"];
                 }
             } else {
                 $style = $fontstyle;
             }
             $chunks[] = array("text" => $cur->nodeValue, "style" => $style, "newlines" => 0);
         } elseif ($cur->nodeType == XML_ELEMENT_NODE) {
             switch (strtolower($cur->nodeName)) {
                 case "b":
                     array_push($hs, "B");
                     break;
                 case "i":
                     array_push($hs, "I");
                     break;
                 case "u":
                     array_push($hs, "U");
                     break;
                 case "br":
                     $chunks[count($chunks) - 1]["newlines"]++;
                     break;
                 case "p":
                     if ($inpara !== null && $inpara < 2) {
                         $chunks[count($chunks) - 1]["newlines"] += 2;
                     }
                     $inpara = 2;
                     break;
             }
         }
         if ($cur->firstChild) {
             $cur = $cur->firstChild;
         } elseif ($cur->nextSibling) {
             $cur = $cur->nextSibling;
         } else {
             while ($cur != null && $cur->nextSibling == null) {
                 $cur = $cur->parentNode;
                 if ($cur != null) {
                     switch (strtolower($cur->nodeName)) {
                         case "b":
                         case "i":
                         case "u":
                             array_pop($hs);
                             break;
                         case "p":
                             $inpara = 0;
                     }
                 }
             }
             if ($cur != null) {
                 $cur = $cur->nextSibling;
             }
         }
     }
     return $chunks;
 }
Example #18
0
}
if (function_exists('mb_regex_encoding')) {
    @mb_regex_encoding('UTF-8');
}
if (function_exists('mb_regex_set_options')) {
    @mb_regex_set_options('pr');
}
# default: "pr"
if (function_exists('mb_http_output')) {
    @mb_http_output('pass');
}
if (function_exists('mb_language')) {
    @mb_language('uni');
}
if (function_exists('mb_substitute_character')) {
    @mb_substitute_character(0xfffd);
}
# Unicode Replacement Character:
# U+FFFD = 0xFFFD (utf16 hex) = 65533 (dec) = "\xEF\xBF\xBD" (utf8 hex)
if (function_exists('mb_detect_order')) {
    @mb_detect_order('auto');
}
$tmp = strToLower(trim(@ini_get('mbstring.func_overload')));
if ($tmp >= '1' || $tmp === 'on') {
    echo "mbstring.func_overload must not be enabled in php.ini\n";
    exit(1);
}
# other php.ini settings
#
ini_set('display_errors', true);
# to be changed when our error handler is installed
Example #19
0
File: Au.php Project: k1LoW/yak
 /**
  * Convert character encoding from ISO-2022-JP to UTF-8.
  *
  * @param  string  $text
  * @return string
  */
 function _convertJisToUtf8($text)
 {
     $backup = mb_substitute_character();
     mb_substitute_character('long');
     $text = mb_convert_encoding($text, 'UTF-8', 'JIS');
     mb_substitute_character($backup);
     $pattern = '/JIS\\+([0-9A-F]{4})/';
     $callback = array($this, '_fallbackJisToUtf8');
     $text = preg_replace_callback($pattern, $callback, $text);
     return $text;
 }
Example #20
0
 function euc2ktaimod($str)
 {
     if ($this->from === MPC_FROM_SOFTBANK) {
         $ex = '\'((s:\' . join(\'))((s:\', explode(\' \', rtrim(chunk_split(strtolower(bin2hex(str_replace(\'\\"\', \'"\', \'$1\'))), 4, \' \')))) . \'))\'';
         $str = preg_replace('/[\\x1B][\\x24]((?:[G|E|F|O|P|Q][\\x21-\\x7E])+)[\\x0F]?/e', $ex, $str);
     } else {
         $prefix = $this->from === MPC_FROM_FOMA ? 'i' : 'e';
         $old = mb_substitute_character();
         mb_substitute_character('long');
         $str = mb_convert_encoding($str, 'EUC-JP', 'EUC-JP');
         mb_substitute_character($old);
         $ex = '\'((' . $prefix . ':\'.strtolower(\'$1\').\'))\'';
         $str = preg_replace('/BAD\\+([0-9A-F]{4})/ie', $ex, $str);
     }
     return $str;
 }
Example #21
0
/**
 * Makes sure the data is using valid utf8, invalid characters are discarded.
 *
 * Note: this function is not intended for full objects with methods and private properties.
 *
 * @param mixed $value
 * @return mixed with proper utf-8 encoding
 */
function fix_utf8($value)
{
    if (is_null($value) or $value === '') {
        return $value;
    } else {
        if (is_string($value)) {
            if ((string) (int) $value === $value) {
                // shortcut
                return $value;
            }
            // Lower error reporting because glibc throws bogus notices.
            $olderror = error_reporting();
            if ($olderror & E_NOTICE) {
                error_reporting($olderror ^ E_NOTICE);
            }
            // Note: this duplicates min_fix_utf8() intentionally.
            static $buggyiconv = null;
            if ($buggyiconv === null) {
                $buggyiconv = (!function_exists('iconv') or iconv('UTF-8', 'UTF-8//IGNORE', '100' . chr(130) . '\\80') !== '100\\80');
            }
            if ($buggyiconv) {
                if (function_exists('mb_convert_encoding')) {
                    $subst = mb_substitute_character();
                    mb_substitute_character('');
                    $result = mb_convert_encoding($value, 'utf-8', 'utf-8');
                    mb_substitute_character($subst);
                } else {
                    // Warn admins on admin/index.php page.
                    $result = $value;
                }
            } else {
                $result = iconv('UTF-8', 'UTF-8//IGNORE', $value);
            }
            if ($olderror & E_NOTICE) {
                error_reporting($olderror);
            }
            return $result;
        } else {
            if (is_array($value)) {
                foreach ($value as $k => $v) {
                    $value[$k] = fix_utf8($v);
                }
                return $value;
            } else {
                if (is_object($value)) {
                    $value = clone $value;
                    // do not modify original
                    foreach ($value as $k => $v) {
                        $value->{$k} = fix_utf8($v);
                    }
                    return $value;
                } else {
                    // this is some other type, no utf-8 here
                    return $value;
                }
            }
        }
    }
}
Example #22
0
 /**
  * Convert a foreign charset encoding from or to UTF-8
  */
 function convert($string, $encoding = NULL, $destination = 'UTF-8')
 {
     if (!$encoding) {
         $encoding = utf8::detect($string);
     }
     if ($encoding == $destination) {
         return $string;
     }
     if (!empty($this->mb_sets)) {
         $encode_mb = array_key_exists($encoding, $this->mb_sets);
         $dest_mb = array_key_exists($destination, $this->mb_sets);
         if ($encode_mb && $dest_mb) {
             @mb_substitute_character('none');
             return mb_convert_encoding($string, $destination, $encoding);
         }
     } else {
         $encode_mb = $dest_mb = false;
     }
     $encode_iconv = array_key_exists($encoding, $this->iconv_sets);
     $dest_iconv = array_key_exists($destination, $this->iconv_sets);
     if ($encode_iconv && $dest_iconv) {
         return @iconv($encoding, $destination . '//IGNORE', $string);
     }
     // must use mixed conversion
     @mb_substitute_character('none');
     if ($encode_mb) {
         $instring = mb_convert_encoding($string, 'UTF-8', $encoding);
     } else {
         if ($encode_iconv) {
             $instring = @iconv($encoding, 'UTF-8' . '//IGNORE', $string);
         } else {
             $instring = $string;
         }
     }
     if ($dest_mb) {
         $outstring = mb_convert_encoding($string, $destination, 'UTF-8');
     } else {
         if ($dest_iconv) {
             $outstring = @iconv('UTF-8', $destination . '//IGNORE', $string);
         } else {
             $outstring = $string;
         }
     }
     return $outstring;
 }
Example #23
0
function caSanitizeStringForJsonEncode($ps_text)
{
    // Remove invalid UTF-8
    mb_substitute_character(0xfffd);
    $ps_text = mb_convert_encoding($ps_text, 'UTF-8', 'UTF-8');
    // @see http://php.net/manual/en/regexp.reference.unicode.php
    return preg_replace("/[^\\p{Ll}\\p{Lm}\\p{Lo}\\p{Lt}\\p{Lu}\\p{N}\\p{P}\\p{Zp}\\p{Zs}\\p{S}]|➔/", '', strip_tags($ps_text));
}
Example #24
0
$str = "Mary Had A Little Lamb and She LOVED It So";
$str = mb_strtolower($str);
var_dump($str);
var_dump(mb_strtolower("ABC"));
$str = "Mary Had A Little Lamb and She LOVED It So";
$str = mb_strtoupper($str);
var_dump($str);
var_dump(mb_strtoupper("abc"));
var_dump(mb_strwidth("PrÜ" . "fung"));
/* Set with Unicode U+3013 (GETA MARK) */
mb_substitute_character(0x3013);
var_dump(mb_substitute_character() === 0x3013);
/* Set hex format */
mb_substitute_character("long");
/* Display current setting */
var_dump(mb_substitute_character());
var_dump(mb_substr_count("This is a test", "is"));
$text = "This is a test";
var_dump(mb_substr_count($text, "is"));
// different from substr_count
// mb_strrchr behaves differently in different versions of
// libmbfl (https://github.com/facebook/hiphop-php/issues/68)
var_dump(mb_substr_count("gcdgcdgcd", "gcdgcd") === 2 || mb_substr_count("gcdgcdgcd", "gcdgcd") === 1);
var_dump(mb_substr("abcdef", 1));
var_dump(mb_substr("abcdef", 1, 3));
var_dump(mb_substr("abcdef", 0, 4));
var_dump(mb_substr("abcdef", 0, 8));
var_dump(mb_substr("abcdef", -1, 1));
var_dump(mb_substr("Ü" . "bcdef", 1));
var_dump(mb_substr("Ü" . "bcdef", 1, 3));
var_dump(mb_substr("Ü" . "bcdef", 0, 4) === "Ü" . "bcd");
Example #25
0
 * It is recommended to not enable this unless absolutely necessary.
 */
spl_autoload_register(array('Kohana', 'auto_load_lowercase'));
/**
 * Enable the Kohana auto-loader for unserialization.
 *
 * @link  http://php.net/spl_autoload_call
 * @link  http://php.net/manual/var.configuration.php#unserialize-callback-func
 */
ini_set('unserialize_callback_func', 'spl_autoload_call');
/**
 * Set the mb_substitute_character to "none"
 *
 * @link http://www.php.net/manual/function.mb-substitute-character.php
 */
mb_substitute_character('none');
// -- Configuration and initialization -----------------------------------------
/**
 * Set Kohana::$environment if a 'GLEEZ_ENV' environment variable has been supplied.
 *
 * @todo In the future Kohana::$environment should be moved to Gleez Core as Gleez::$environment
 *
 * @link https://github.com/gleez/cms/wiki/Apache
 * @link https://github.com/gleez/cms/wiki/Nginx
 */
if (isset($_SERVER['GLEEZ_ENV'])) {
    // Get environment variable from $_SERVER, .htaccess, apache.conf, nginx.conf, etc.
    $env = 'Kohana::' . strtoupper($_SERVER['GLEEZ_ENV']);
} elseif (get_cfg_var('GLEEZ_ENV')) {
    // Get environment variable from php.ini or from ini_get('user_ini.filename')
    $env = 'Kohana::' . strtoupper(get_cfg_var('GLEEZ_ENV'));
Example #26
0
/**
 * Makes sure the data is using valid utf8, invalid characters are discarded.
 *
 * Note: this function is not intended for full objects with methods and private properties.
 *
 * @param mixed $value
 * @return mixed with proper utf-8 encoding
 */
function fix_utf8($value)
{
    if (is_null($value) or $value === '') {
        return $value;
    } else {
        if (is_string($value)) {
            if ((string) (int) $value === $value) {
                // Shortcut.
                return $value;
            }
            // No null bytes expected in our data, so let's remove it.
            $value = str_replace("", '', $value);
            // Note: this duplicates min_fix_utf8() intentionally.
            static $buggyiconv = null;
            if ($buggyiconv === null) {
                $buggyiconv = (!function_exists('iconv') or @iconv('UTF-8', 'UTF-8//IGNORE', '100' . chr(130) . '€') !== '100€');
            }
            if ($buggyiconv) {
                if (function_exists('mb_convert_encoding')) {
                    $subst = mb_substitute_character();
                    mb_substitute_character('');
                    $result = mb_convert_encoding($value, 'utf-8', 'utf-8');
                    mb_substitute_character($subst);
                } else {
                    // Warn admins on admin/index.php page.
                    $result = $value;
                }
            } else {
                $result = @iconv('UTF-8', 'UTF-8//IGNORE', $value);
            }
            return $result;
        } else {
            if (is_array($value)) {
                foreach ($value as $k => $v) {
                    $value[$k] = fix_utf8($v);
                }
                return $value;
            } else {
                if (is_object($value)) {
                    // Do not modify original.
                    $value = clone $value;
                    foreach ($value as $k => $v) {
                        $value->{$k} = fix_utf8($v);
                    }
                    return $value;
                } else {
                    // This is some other type, no utf-8 here.
                    return $value;
                }
            }
        }
    }
}
Example #27
0
 function plugin_urlbookmark_get_title($url)
 {
     $ht = new Hyp_HTTP_Request();
     $ht->init();
     $ht->ua = 'Mozilla/5.0';
     $ht->url = $url;
     $ht->get();
     if ($ht->rc !== 200) {
         return 'The page not found. (' . $ht->rc . ')';
     }
     $data = $ht->data;
     $ht = NULL;
     $buf = preg_replace('/[\\x00\\r\\n]+/', '', $data);
     if (preg_match('/<title[^>]*>(.+?)<\\/title>/i', $buf, $tmpary)) {
         $title = trim($tmpary[1]);
     } else {
         $title = rawurldecode($url);
     }
     $title = str_replace(array('<', '>'), array('&lt;', '&gt;'), $title);
     $enc = $this->get_encoding($buf);
     if ($enc !== 'auto') {
         $this->func->encode_numericentity($title, $this->cont['SOURCE_ENCODING'], $enc);
         $title = mb_convert_encoding($title, $this->cont['SOURCE_ENCODING'], $enc);
     } else {
         if (extension_loaded('mbstring')) {
             $enc = $this->get_encoding($buf);
             if (strtoupper($this->cont['SOURCE_ENCODING']) === 'UTF-8') {
                 $title = mb_convert_encoding($title, $this->cont['SOURCE_ENCODING'], $enc);
             } else {
                 $_sub = mb_substitute_character();
                 mb_substitute_character(0x3c);
                 $_title = @mb_convert_encoding($title, $this->cont['SOURCE_ENCODING'], $enc);
                 if (strpos($_title, '<') !== FALSE) {
                     $title = @mb_convert_encoding($title, 'UTF-8', $enc);
                     $title = mb_convert_encoding($title, 'HTML-ENTITIES', 'UTF-8');
                 } else {
                     $title = $_title;
                 }
                 mb_substitute_character($_sub);
             }
         }
     }
     return trim($title);
 }
Example #28
0
 public static function csv($text)
 {
   mb_substitute_character(0x00A0);  // Pour mettre " " au lieu de "?" en remplacement des caractères non convertis.
   return mb_convert_encoding($text,'Windows-1252','UTF-8');
 }
Example #29
0
 /**
  * @param string $sInputString
  * @param string $sInputFromEncoding
  * @param string $sInputToEncoding
  *
  * @return string|bool
  */
 public static function MbConvertEncoding($sInputString, $sInputFromEncoding, $sInputToEncoding)
 {
     static $sMbstringSubCh = null;
     if (null === $sMbstringSubCh) {
         $sMbstringSubCh = \mb_substitute_character();
     }
     \mb_substitute_character('none');
     $sResult = @\mb_convert_encoding($sInputString, \strtoupper($sInputToEncoding), \strtoupper($sInputFromEncoding));
     \mb_substitute_character($sMbstringSubCh);
     return $sResult;
 }
 /**
  * @param Dictionary $dictionary
  * @throws \BadMethodCallException $this->textFileOnly が偽、かつ「画像・音声・動画ファイルを含む場合のファイル形式」をCSVファイルのみで構文解析していた場合。
  * @throws EmptyOutputException 該当の辞書形式に変換可能なお題が一つも存在しなかった。
  * @return string[]
  */
 public function serialize(Dictionary $dictionary) : array
 {
     $directoryName = (new \esperecyan\dictionary_php\validator\FilenameValidator())->convertToValidFilenameWithoutExtensionInArchives($dictionary->getTitle());
     foreach ($dictionary->getWords() as $word) {
         $serialized = $this->type === 'Inteligenceω しりとり' ? $this->serializeWordAsShiritori($word) : $this->serializeWordAsQuiz($word, $directoryName);
         if ($serialized !== '') {
             $words[] = $serialized;
         }
     }
     if (empty($words)) {
         throw new EmptyOutputException(sprintf(_('%sの辞書形式に変換可能なお題が見つかりませんでした。'), $this->type));
     }
     $previousSubstituteCharacter = mb_substitute_character();
     mb_substitute_character(\IntlChar::ord(self::SUBSTITUTE_CHARACTER));
     $bytes = mb_convert_encoding($this->serializeMetadata($dictionary, '%') . implode('', $words), 'Windows-31J', 'UTF-8');
     mb_substitute_character($previousSubstituteCharacter);
     $files = $dictionary->getFiles();
     if (!$files && !$this->textFileOnly && $dictionary->getFilenames()) {
         throw new \BadMethodCallException();
     } elseif ($this->type === 'Inteligenceω クイズ' && $files && !$this->textFileOnly) {
         $archive = $this->generateArchive();
         foreach ($files as $file) {
             $archive->addFile($file, "{$directoryName}/" . $file->getFilename());
         }
         $archive->addFromString("{$directoryName}.txt", $bytes);
         $archivePath = $archive->filename;
         $archive->close();
         return ['bytes' => file_get_contents($archivePath), 'type' => 'application/zip', 'name' => $this->getFilename($dictionary, 'zip')];
     } else {
         return ['bytes' => $bytes, 'type' => 'text/plain; charset=Shift_JIS', 'name' => $this->getFilename($dictionary, 'txt')];
     }
 }