/**
  * Given a decimal number, return the UTF-8 character.
  */
 public static function lookupDecimal($int)
 {
     $entity = '&#' . $int . ';';
     // UNTESTED: This may fail on some planes. Couldn't find full documentation
     // on the value of the mask array.
     return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
 }
Example #2
0
 /**
  * Setup: Decode our test strings aheads of time and disable the MultiByte library.
  */
 protected function setup()
 {
     $convmap = array(0x80, 0xffff, 0, 0xffff);
     foreach ($this->test_strings as $key => $value) {
         $this->test_strings[$key] = mb_decode_numericentity($value, $convmap, 'utf-8');
     }
 }
Example #3
0
function motopressCEJsonEncode($array)
{
    //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding
    $options = array('convmap' => array(0x80, 0xffff, 0, 0xffff), 'encoding' => 'UTF-8');
    array_walk_recursive($array, 'motopressCEMbEncodeNumericentity', $options);
    return mb_decode_numericentity(json_encode($array), $options['convmap'], $options['encoding']);
}
Example #4
0
/**
 * For nicer placement in our textareas - but are we using this really?
 * @param string $text 
 * @param string $process 
 * @return string
 */
function stripForForm($text = '', $process = '')
{
    if (empty($process)) {
        // have we checked this yet
        if (function_exists('mb_decode_numericentity')) {
            return mb_decode_numericentity($text, UTF8EntConvert('1'), 'utf-8');
        } else {
            $text = htmlspecialchars($text);
            return str_replace(array("&gt;", "&lt;"), array(">", "<"), $text);
        }
    }
    if ($text) {
        $out = str_replace("<p>", "", $text);
        $out = str_replace(array("<br />", "<br>"), array("", ""), $out);
        $out = str_replace("</p>", "", $out);
        if (function_exists('mb_decode_numericentity')) {
            $out = mb_decode_numericentity($out, UTF8EntConvert('1'), 'utf-8');
        } else {
            $out = htmlspecialchars($out);
            $out = str_replace(array("&gt;", "&lt;"), array(">", "<"), $out);
        }
        return $out;
    } else {
        return '';
    }
}
/**
 * Polyfill for json_encode JSON_UNESCAPED_UNICODE (new in PHP 5.4.0) for PHP 5.3
 */
function kfJsonEncode($arr)
{
    array_walk_recursive($arr, function (&$item, $key) {
        if (is_string($item)) {
            $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
        }
    });
    return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
}
Example #6
0
 /**
  * Convert character encoding from Shift_JIS to UTF-8.
  *
  * @param  string  $text
  * @return string
  */
 function _convertSjisToUtf8($text)
 {
     $pattern = '/\\x1B\\x24([\\x45-\\x47\\x4F-\\x51][\\x21-\\x7A]+)\\x0F?/';
     $callback = array($this, '_convertWebcodeToEntity');
     $text = preg_replace_callback($pattern, $callback, $text);
     $text = mb_convert_encoding($text, 'UTF-8', 'SJIS-win');
     $text = mb_decode_numericentity($text, $this->_utf8map, 'UTF-8');
     return $text;
 }
Example #7
0
 /**
  * Setup: Decode our test strings aheads of time and disable the MultiByte library.
  */
 protected function setup()
 {
     $convmap = array(0x80, 0xffff, 0, 0xffff);
     foreach ($this->test_strings as $key => $value) {
         $this->test_strings[$key] = mb_decode_numericentity($value, $convmap, 'utf-8');
     }
     // disable using a multibyte library
     $this->old_library = MultiByte::library(false);
 }
Example #8
0
 public function decodeNumericEntity($text)
 {
     if (function_exists('mb_decode_numericentity')) {
         $convmap = array(0x0, 0x2ffff, 0, 0xffff);
         return mb_decode_numericentity($text, $convmap, 'UTF-8');
     } else {
         return $text;
     }
 }
Example #9
0
 function sphinx_keyword($keyword, $index_data = '')
 {
     $this->index_data = $index_data ? $index_data : $this->index_data;
     //Cắt ngắn
     if (mb_strlen($keyword, "UTF-8") > $this->max_keyword_length) {
         $keyword = mb_substr($keyword, 0, $this->max_keyword_length, "UTF-8");
     }
     $this->keyword = mb_strtolower($keyword, "UTF-8");
     //echo "2";
     //Remove "
     $this->keyword = str_replace("&quot;", "", $this->keyword);
     //Replace các bad character
     $array_bad_word = array("?", "^", ",", ";", "*", "/", "~", "@", "-", "!", "[", "]", "(", ")", "=", "|");
     $this->keyword = str_replace($array_bad_word, "", $this->keyword);
     //Chống các ký tự ô vuông, convert lại đúng kiểu UTF-8
     $this->keyword = mb_convert_encoding($this->keyword, "UTF-8", "UTF-8");
     //Xóa bỏ ký tự NCR
     $convmap = array(0x0, 0x2ffff, 0, 0xffff);
     $this->keyword = @mb_decode_numericentity($this->keyword, $convmap, "UTF-8");
     //echo "3";
     $j = -1;
     //Lấy keyword còn lại sau, bẻ dấu cách
     $array_temp = explode(" ", $this->keyword);
     for ($i = 0; $i < count($array_temp); $i++) {
         if (trim($array_temp[$i]) != "") {
             //Những keyword có độ dài > 1 mới cho vào array
             if (mb_strlen(trim($array_temp[$i]), "UTF-8") > 1) {
                 $j++;
                 $this->array_keyword[$j][0] = str_replace("'", "''", trim($array_temp[$i]));
             }
         }
     }
     $quorum = count($array_temp) * 3 / 5;
     $quorum = intval($quorum);
     if ($quorum < 2) {
         $quorum = 2;
     }
     $this->keyword = trim($this->keyword);
     $this->original_keyword = $this->keyword;
     //echo $this->keyword;
     //Cấu hình sphinx tại localhost
     if (@$_SERVER['SERVER_NAME'] == "localhost") {
         $this->sphinx_host = "127.0.0.1";
         $this->sphinx_port = 9312;
     }
     //echo "3";
     //Khởi tạo class và mở kết nối đến server
     $this->sphinx = new SphinxClient();
     $this->sphinx->SetServer($this->sphinx_host, $this->sphinx_port);
     $this->sphinx->SetConnectTimeout(1.5);
     $this->sphinx->SetMatchMode(SPH_MATCH_ANY);
     //Lấy max 5030 kết quả trả về
     $this->sphinx->_maxmatches = 330;
     $this->sphinx->Open();
     //echo "4";
 }
Example #10
0
function json_encode_readable($arr)
{
    //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding
    array_walk_recursive($arr, function (&$item, $key) {
        if (is_string($item)) {
            $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
        }
    });
    return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
}
function json($data)
{
    $CI =& get_instance();
    if ($CI->input->is_ajax_request()) {
        return json_encode($data);
    }
    $data = str_replace('<br />', '', $data);
    return preg_replace_callback('/\\\\u([0-9a-f]{4})/i', function ($val) {
        return mb_decode_numericentity('&#' . intval($val[1], 16) . ';', array(0, 0xffff, 0, 0xffff), 'utf-8');
    }, json_encode($data));
}
Example #12
0
 /**
  * Callback function called by the filter() method.
  *
  * This function converts Unicode hexadecimal number to UTF-8 emoji.
  *
  * @param  array   $matches
  * @return string
  */
 function _convertEntityToUtf8($matches)
 {
     $unicode = hexdec($matches[1]);
     $entity = '&#' . $unicode . ';';
     $utf8 = mb_decode_numericentity($entity, $this->_convmap, 'UTF-8');
     if ($entity !== $utf8) {
         return $utf8;
     } else {
         return $matches[0];
     }
 }
Example #13
0
 /**
  * HTMLデコードした文字列を返す
  * @param string $value 対象の文字列
  * @return string
  */
 public static function htmldecode($value)
 {
     if (!empty($value) && is_string($value)) {
         $value = mb_convert_encoding($value, 'UTF-8', mb_detect_encoding($value));
         $value = preg_replace_callback("/&#[xX]([0-9a-fA-F]+);/u", function ($m) {
             return '&#' . hexdec($m[1]) . ';';
         }, $value);
         $value = mb_decode_numericentity($value, array(0x0, 0x10000, 0, 0xfffff), "UTF-8");
         $value = html_entity_decode($value, ENT_QUOTES, "UTF-8");
         $value = str_replace(array("\\\"", "\\'", "\\\\"), array("\"", "\\'", "\\"), $value);
     }
     return $value;
 }
 /**
  * @param mixed $number
  *
  * @return string
  */
 public static function fromDecimal($number)
 {
     // Only convert code points within planes 0-2, excluding NULL
     if (empty($number) || $number > 0x2ffff) {
         return self::fromHex('fffd');
     }
     $entity = '&#' . $number . ';';
     $converted = mb_decode_numericentity($entity, [0x0, 0x2ffff, 0, 0xffff], 'UTF-8');
     if ($converted === $entity) {
         return self::fromHex('fffd');
     }
     return $converted;
 }
Example #15
0
 /**
  * @author devilan (REMOVEIT) (at) o2 (dot) pl
  * For PHP5.3 users who want to emulate JSON_UNESCAPED_UNICODE
  * @see https://php.net/manual/en/function.json-encode.php#105789
  */
 public static function associativeArrayToJsonStr($arr, $optionsBitMask = 0)
 {
     if (defined('JSON_UNESCAPED_UNICODE')) {
         return json_encode($arr, JSON_UNESCAPED_UNICODE | $optionsBitMask);
     }
     $convmap = array(0x80, 0xffff, 0, 0xffff);
     //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding
     array_walk_recursive($arr, function (&$item, $key) use(&$convmap) {
         if (is_string($item)) {
             $item = mb_encode_numericentity($item, $convmap, 'UTF-8');
         }
     });
     return mb_decode_numericentity(json_encode($arr, $optionsBitMask), $convmap, 'UTF-8');
 }
Example #16
0
 private function fromNumericEntities($pValue)
 {
     $convmap = array(0x80, 0xff, 0, 0xff);
     if (!is_array($pValue)) {
         $specialChars = array("&#8221;" => '"', "&#8220;" => '"', "&#8222;" => '"', "&#8211;" => '-', "&#8212;" => '_', "&#8216" => "'", "&#8217" => "'", "&#8218" => "'");
         foreach ($specialChars as $k => $v) {
             $pValue = preg_replace("/" . $k . "/", $v, $pValue);
         }
         return mb_decode_numericentity($pValue, $convmap, "UTF-8");
     }
     foreach ($pValue as &$value) {
         $value = $this->fromNumericEntities($value);
     }
     return $pValue;
 }
Example #17
0
 /**
  * Méthode static de décodage récursif des entités numériques
  * @static
  * @param  mixed $pValue
  * @return mixed|string
  */
 public static function fromNumericEntities($pValue)
 {
     $convmap = array(0x80, 0xff, 0, 0xff);
     if (!is_array($pValue)) {
         $specialChars = array("&#8221;" => '"', "&#8220;" => '"', "&#8222;" => '"', "&#8211;" => '-', "&#8212;" => '_', "&#8216" => "'", "&#8217" => "'", "&#8218" => "'");
         foreach ($specialChars as $k => $v) {
             $pValue = preg_replace("/" . $k . "/", $v, $pValue);
         }
         return mb_decode_numericentity($pValue, $convmap, Configuration::$global_encoding);
     }
     foreach ($pValue as &$value) {
         $value = self::fromNumericEntities($value);
     }
     return $pValue;
 }
Example #18
0
 protected function parseParameters($parameters)
 {
     if (!empty($parameters) && is_array($parameters)) {
         $object = new stdClass();
         $parent = $this->getParent();
         $object->{$parent} = new stdClass();
         $parent = $object->{$parent};
         if ($this->actionInclude('/reorder')) {
             foreach ($parameters as $id) {
                 $item = new stdClass();
                 $item->id = $id;
                 $parent->{$this->parent}[] = $item;
             }
         } else {
             foreach ($this->fields as $field => $options) {
                 $value = $this->getValue($field, $options, $parameters);
                 if (isset($options['attributes'])) {
                     foreach ($options['attributes'] as $name => $type) {
                         if (null !== $value) {
                             if ($name === 'type') {
                                 if ($type === 'array') {
                                     if (is_string($value) || is_numeric($value)) {
                                         $value = (array) $value;
                                     } else {
                                         $value = null;
                                     }
                                 } else {
                                     settype($value, $type);
                                 }
                             }
                         }
                     }
                 }
                 if (null !== $value) {
                     if (is_string($value)) {
                         $value = mb_encode_numericentity($value, [0x80, 0xffff, 0, 0xffff], 'utf-8');
                     }
                     !empty($options['sibling']) ? $object->{$field} = $value : ($parent->{$field} = $value);
                 }
             }
         }
         $parameters = json_encode($object);
         $parameters = mb_decode_numericentity($parameters, [0x80, 0xffff, 0, 0xffff], 'utf-8');
     } else {
         $parameters = '{}';
     }
     return $parameters;
 }
Example #19
0
 public function jsonEncode(&$arr)
 {
     //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding
     array_walk_recursive($arr, function (&$item, $key) {
         if (is_string($item)) {
             $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
         } elseif (is_object($item)) {
             $reflection = new \ReflectionObject($item);
             $props = $reflection->getProperties();
             $tmp = array();
             foreach ($props as $prop) {
                 $name = substr($prop->getName(), 1);
                 $value = '';
                 try {
                     $method = $reflection->getMethod('get' . ucfirst($name));
                     $value = $method->invoke($item);
                 } catch (\Exception $ex) {
                     if ($reflection->name == 'org\\autoset\\santorini\\vo\\VirtualFormVO') {
                         $value = $item->__call('get' . ucfirst($name), null);
                     }
                 }
                 if ($value instanceof JSONString) {
                     $value = $value->toString();
                 } elseif (is_string($value)) {
                     $value = mb_encode_numericentity($value, array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
                 } elseif (is_array($value)) {
                     $value = json_decode($this->jsonEncode($value));
                 } elseif (is_object($value)) {
                     $value = $this->jsonEncode($value);
                 }
                 $tmp[$name] = $value;
             }
             $item = $tmp;
         }
     });
     return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
 }
Example #20
0
 function decode_high($text, $charset = "UTF-8")
 {
     return mb_decode_numericentity($text, $this->cmap(), $charset);
 }
 static function correct($str, $broken = '')
 {
     $corrected = '';
     $strlen = strlen($str);
     for ($i = 0; $i < $strlen; $i++) {
         switch ($str[$i]) {
             case "\t":
             case "\n":
             case "\r":
                 $corrected .= $str[$i];
                 break;
             case "":
                 $corrected .= $broken;
                 break;
             default:
                 $high = ord($str[$i]);
                 if ($high < 0x20) {
                     // Special Characters.
                     $corrected .= $broken;
                 } else {
                     if ($high < 0x80) {
                         // 1byte.
                         $corrected .= $str[$i];
                     } else {
                         if ($high <= 0xc1) {
                             $corrected .= $broken;
                         } else {
                             if ($high < 0xe0) {
                                 // 2byte.
                                 if ($i + 1 >= $strlen || ($str[$i + 1] & "À") != "€") {
                                     $corrected .= $broken;
                                 } else {
                                     $corrected .= $str[$i] . $str[$i + 1];
                                 }
                                 $i += 1;
                             } else {
                                 if ($high < 0xf0) {
                                     // 3byte.
                                     if ($i + 2 >= $strlen || ($str[$i + 1] & "À") != "€" || ($str[$i + 2] & "À") != "€") {
                                         $corrected .= $broken;
                                     } else {
                                         $corrected .= $str[$i] . $str[$i + 1] . $str[$i + 2];
                                     }
                                     $i += 2;
                                 } else {
                                     if ($high < 0xf5) {
                                         // 4byte.
                                         if ($i + 3 >= $strlen || ($str[$i + 1] & "À") != "€" || ($str[$i + 2] & "À") != "€" || ($str[$i + 3] & "À") != "€") {
                                             $corrected .= $broken;
                                         } else {
                                             $corrected .= $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
                                         }
                                         $i += 3;
                                     } else {
                                         // F5~FF is invalid by RFC3629.
                                         $corrected .= $broken;
                                     }
                                 }
                             }
                         }
                     }
                 }
                 break;
         }
     }
     if (preg_match('/&#([0-9]{1,});/', $corrected)) {
         $corrected = mb_decode_numericentity($corrected, array(0x0, 0x10000, 0, 0xfffff), 'UTF-8');
     }
     return $corrected;
 }
Example #22
0
 /**
  * Converts numeric HTML character references to character code.
  *
  * @param  string $text    The input
  * @param  string $charset The character set
  * @return string Processed input
  */
 protected function decodeHigh($text, $charset = 'UTF-8')
 {
     $text = ctype_digit($text) ? "&#{$text};" : "&{$text};";
     return $this->mb ? mb_decode_numericentity($text, $this->cmap, $charset) : html_entity_decode($text, ENT_NOQUOTES, $charset);
 }
Example #23
0
 /**
  * Wrapper for json_encode function.
  * Emulates JSON_UNESCAPED_UNICODE.
  *
  * @param type $arr
  * @return JSON
  * @author peshkov@UD
  */
 public static function json_encode($arr)
 {
     // convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding
     array_walk_recursive($arr, create_function('&$item, $key', 'if (is_string($item)) $item = mb_encode_numericentity($item, array (0x80, 0xffff, 0, 0xffff), "UTF-8");'));
     return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
 }
 function ajax_process_news()
 {
     $this->_create_db_client();
     if (!get_t3i_options('debug_mode')) {
         error_reporting(0);
         // Don't break the JSON result
         header('Content-type: application/json');
         $this->news_uid = (int) $_REQUEST['id'];
     }
     // grab the record from TYPO3
     $news = $this->get_news($this->news_uid);
     if (!is_array($news) || $news['itemid'] != $this->news_uid) {
         die(json_encode(array('error' => sprintf(__("Failed import: %s isn't a TYPO3 news record.", 'typo3-importer'), esc_html($_REQUEST['id'])))));
     }
     if (get_t3i_options('decode_entities')) {
         $conv_map = array(0x0, 0x10000, 0, 0xfffff);
         foreach ($news as $key => $value) {
             if (!is_array($value)) {
                 $news[$key] = mb_decode_numericentity($value, $conv_map, 'UTF-8');
             } else {
                 foreach ($value as $vKey => $vValue) {
                     $value[$vKey] = mb_decode_numericentity($vValue, $conv_map, 'UTF-8');
                 }
                 $news[$key] = $value;
             }
         }
     }
     // TODO progress by post
     // process and import news post
     $post_id = $this->import_news_as_post($news);
     $this->featured_image_id = false;
     // replace original external images with internal
     $this->_typo3_replace_images($post_id);
     // Handle all the metadata for this post
     $this->insert_postmeta($post_id, $news);
     if (get_t3i_options('set_featured_image') && $this->featured_image_id) {
         update_post_meta($post_id, "_thumbnail_id", $this->featured_image_id);
     }
     if (!get_t3i_options('no_comments_import')) {
         $this->process_comments();
     }
     die(json_encode(array('success' => sprintf(__('&quot;<a href="%1$s" target="_blank">%2$s</a>&quot; Post ID %3$s was successfully processed in %4$s seconds.', 'typo3-importer'), get_permalink($post_id), esc_html(get_the_title($post_id)), $post_id, timer_stop()))));
 }
Example #25
0
 /**
  * @param $arr
  * @return string
  * courtesy from: http://www.php.net/manual/ru/function.json-encode.php#105789
  */
 public static function json_encode_unescaped_unicode($arr)
 {
     array_walk_recursive($arr, array(__CLASS__, 'json_unescaped_unicode_walk_callback'));
     return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8');
 }
 function afterFilter()
 {
     $_data = $this->c->output;
     if ($this->is_ezweb()) {
         // KDDI
         $_data = str_replace("<html>", "<?xml version=\"1.0\" encoding=\"Shift_JIS\"?><!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">", $_data);
         $_data = str_replace("font-size:small", "font-size:12px", $_data);
     } elseif ($this->is_imode()) {
         // DoCoMo
         $_data = str_replace("<html>", "<?xml version=\"1.0\" encoding=\"Shift_JIS\"?><!DOCTYPE html PUBLIC \"-//i-mode group (ja)//DTD XHTML i-XHTML(Locale/Ver.=ja/2.0) 1.0//EN\" \"i-xhtml_4ja_10.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">", $_data);
         $_data = str_replace("istyle=\"1\"", "style=\"-wap-input-format:&quot;*&lt;ja:h&gt;&quot;\"", $_data);
         $_data = str_replace("istyle=\"3\"", "style=\"-wap-input-format:&quot;*&lt;ja:en&gt;&quot;\"", $_data);
         $_data = str_replace("istyle=\"4\"", "style=\"-wap-input-format:&quot;*&lt;ja:n&gt;&quot;\"", $_data);
         // 最近は全部GIFっぽい
         //$_data = preg_replace("/<img src=\"(.+?)\.(gif)\"/", '<img src="\\1.png"', $_data);
     } elseif ($this->is_softbank()) {
         // SoftBank
         $_data = str_replace("istyle=\"1\"", "mode=\"hiragana\"", $_data);
         $_data = str_replace("istyle=\"3\"", "mode=\"alphabet\"", $_data);
         $_data = str_replace("istyle=\"4\"", "mode=\"numeric\"", $_data);
     }
     if ($this->is_mobile()) {
         $_data = $this->convertMobile($_data);
         $_data = mb_convert_kana($_data, "kVrns", 'UTF-8');
         header("Content-type: application/xhtml+xml; charset=Shift_JIS");
         if ($this->is_ezweb()) {
             $sjismap = array(0xe234, 0xe272, 0xa0c, 0xffff, 0xe273, 0xe2ef, 0xa0d, 0xffff, 0xe2f0, 0xe32e, 0xa50, 0xffff, 0xe32f, 0xe342, 0xa51, 0xffff, 0xe468, 0xe4a6, 0xad8, 0xffff, 0xe4a7, 0xe523, 0xad9, 0xffff, 0xe524, 0xe562, 0xb1c, 0xffff, 0xe563, 0xe5df, 0xb1d, 0xffff);
             $utf8map = array(0xec40, 0xecfc, 0x0, 0xffff, 0xed40, 0xed93, 0x0, 0xffff, 0xef40, 0xeffc, 0x0, 0xffff, 0xf040, 0xf0fc, 0x0, 0xffff);
             $_data = mb_encode_numericentity($_data, $utf8map, 'UTF-8');
             $_data = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8');
             $this->c->output = mb_decode_numericentity($_data, $sjismap, 'SJIS-win');
         } elseif ($this->is_softbank()) {
             $utf8map = array(0xe001, 0xe05a, 0x0, 0xffff, 0xe101, 0xe15a, 0x0, 0xffff, 0xe201, 0xe25a, 0x0, 0xffff, 0xe301, 0xe34d, 0x0, 0xffff, 0xe401, 0xe44c, 0x0, 0xffff, 0xe501, 0xe53e, 0x0, 0xffff);
             $_data = mb_encode_numericentity($_data, $utf8map, 'UTF-8');
             $_data = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8');
             $pattern = '/&#(5\\d{4});/';
             $callback = array($this, '_softbank_convertUnicodeToSjis');
             $this->c->output = preg_replace_callback($pattern, $callback, $_data);
         } else {
             $this->c->output = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8');
         }
     } else {
         $this->c->output = $this->convertPC($_data);
     }
 }
function decode_high($text)
{
    $cmap = cmap();
    return mb_decode_numericentity($text, $cmap, "UTF-8");
}
function htmlCharsDecode($str)
{
    $convertMap = array(0x0, 0x2ffff, 0, 0xffff);
    return mb_decode_numericentity($str, $convertMap, 'UTF-8');
}
Example #29
0
function hesk_convert_to_utf8_and_clean_html_entities($text)
{
    // Can we use the multibyte functionality of PHP?
    if (function_exists('mb_decode_numericentity')) {
        $text = mb_decode_numericentity($text, array(0x0, 0x2ffff, 0, 0xffff), 'UTF-8');
    } else {
        $text = preg_replace_callback('/&#([0-9a-fx]+);/mi', 'hesk_replace_num_entity', $text);
    }
    // Entities that are not case sensitive
    $html_entities = array('&quot;' => '"', '&#148;' => '"', '&ldquo;' => '"', '&rdquo;' => '"', '&bdquo;' => '"', '&prime;' => '"', '&apos;' => '\'', '&lsquo;' => '\'', '&rsquo;' => '\'', '&prime;' => '\'', '&acute;' => '\'', '&nbsp;' => ' ', '&ensp;' => ' ', '&emsp;' => ' ', '&thinsp;' => ' ', '&iexcl;' => '¡', '&cent;' => '¢', '&pound;' => '£', '&curren;' => '¤', '&yen;' => '¥', '&brvbar;' => '¦', '&sect;' => '§', '&uml;' => '¨', '&copy;' => '©', '&ordf;' => 'ª', '&laquo;' => '«', '&not;' => '¬', '&shy;' => '­­', '&reg;' => '®', '&macr;' => '¯', '&deg;' => '°', '&plusmn;' => '±', '&sup2;' => '²', '&sup3;' => '³', '&micro;' => 'µ', '&para;' => '¶', '&middot;' => '·', '&cedil;' => '¸', '&sup1;' => '¹', '&ordm;' => 'º', '&raquo;' => '»', '&frac14;' => '¼', '&frac12;' => '½', '&frac34;' => '¾', '&iquest;' => '¿', '&times;' => '×', '&divide;' => '÷', '&forall;' => '∀', '&part;' => '∂', '&exist;' => '∃', '&empty;' => '∅', '&nabla;' => '∇', '&isin;' => '∈', '&notin;' => '∉', '&ni;' => '∋', '&prod;' => '∏', '&sum;' => '∑', '&minus;' => '−', '&lowast;' => '∗', '&radic;' => '√', '&prop;' => '∝', '&infin;' => '∞', '&ang;' => '∠', '&and;' => '∧', '&or;' => '∨', '&cap;' => '∩', '&cup;' => '∪', '&int;' => '∫', '&there4;' => '∴', '&sim;' => '∼', '&cong;' => '≅', '&asymp;' => '≈', '&ne;' => '≠', '&equiv;' => '≡', '&le;' => '≤', '&ge;' => '≥', '&sub;' => '⊂', '&sup;' => '⊃', '&nsub;' => '⊄', '&sube;' => '⊆', '&supe;' => '⊇', '&oplus;' => '⊕', '&otimes;' => '⊗', '&perp;' => '⊥', '&sdot;' => '⋅', '&fnof;' => 'ƒ', '&circ;' => 'ˆ', '&tilde;' => '˜', '&ndash;' => '–', '&mdash;' => '—', '&sbquo;' => ',', '&bull;' => '•', '&hellip;' => '…', '&permil;' => '‰', '&lsaquo;' => '‹', '&rsaquo;' => '›', '&oline;' => '‾', '&euro;' => '€', '&trade;' => '™', '&larr;' => '←', '&uarr;' => '↑', '&rarr;' => '→', '&darr;' => '↓', '&harr;' => '↔', '&loz;' => '◊', '&spades;' => '♠', '&clubs;' => '♣', '&hearts;' => '♥', '&diams;' => '♦');
    $text = str_ireplace(array_keys($html_entities), array_values($html_entities), $text);
    // Case sensitive entities
    $html_entities = array('&Agrave;' => 'À', '&Aacute;' => 'Á', '&Acirc;' => 'Â', '&Atilde;' => 'Ã', '&Auml;' => 'Ä', '&Aring;' => 'Å', '&AElig;' => 'Æ', '&Ccedil;' => 'Ç', '&Egrave;' => 'È', '&Eacute;' => 'É', '&Ecirc;' => 'Ê', '&Euml;' => 'Ë', '&Igrave;' => 'Ì', '&Iacute;' => 'Í', '&Icirc;' => 'Î', '&Iuml;' => 'Ï', '&ETH;' => 'Ð', '&Ntilde;' => 'Ñ', '&Ograve;' => 'Ò', '&Oacute;' => 'Ó', '&Ocirc;' => 'Ô', '&Otilde;' => 'Õ', '&Ouml;' => 'Ö', '&Oslash;' => 'Ø', '&Ugrave;' => 'Ù', '&Uacute;' => 'Ú', '&Ucirc;' => 'Û', '&Uuml;' => 'Ü', '&Yacute;' => 'Ý', '&THORN;' => 'Þ', '&szlig;' => 'ß', '&agrave;' => 'à', '&aacute;' => 'á', '&acirc;' => 'â', '&atilde;' => 'ã', '&auml;' => 'ä', '&aring;' => 'å', '&aelig;' => 'æ', '&ccedil;' => 'ç', '&egrave;' => 'è', '&eacute;' => 'é', '&ecirc;' => 'ê', '&euml;' => 'ë', '&igrave;' => 'ì', '&iacute;' => 'í', '&icirc;' => 'î', '&iuml;' => 'ï', '&eth;' => 'ð', '&ntilde;' => 'ñ', '&ograve;' => 'ò', '&oacute;' => 'ó', '&ocirc;' => 'ô', '&otilde;' => 'õ', '&ouml;' => 'ö', '&oslash;' => 'ø', '&ugrave;' => 'ù', '&uacute;' => 'ú', '&ucirc;' => 'û', '&uuml;' => 'ü', '&yacute;' => 'ý', '&thorn;' => 'þ', '&yuml;' => 'ÿ', '&Alpha;' => 'Α', '&Beta;' => 'Β', '&Gamma;' => 'Γ', '&Delta;' => 'Δ', '&Epsilon;' => 'Ε', '&Zeta;' => 'Ζ', '&Eta;' => 'Η', '&Theta;' => 'Θ', '&Iota;' => 'Ι', '&Kappa;' => 'Κ', '&Lambda;' => 'Λ', '&Mu;' => 'Μ', '&Nu;' => 'Ν', '&Xi;' => 'Ξ', '&Omicron;' => 'Ο', '&Pi;' => 'Π', '&Rho;' => 'Ρ', '&Sigma;' => 'Σ', '&Tau;' => 'Τ', '&Upsilon;' => 'Υ', '&Phi;' => 'Φ', '&Chi;' => 'Χ', '&Psi;' => 'Ψ', '&Omega;' => 'Ω', '&alpha;' => 'α', '&beta;' => 'β', '&gamma;' => 'γ', '&delta;' => 'δ', '&epsilon;' => 'ε', '&zeta;' => 'ζ', '&eta;' => 'η', '&theta;' => 'θ', '&iota;' => 'ι', '&kappa;' => 'κ', '&lambda;' => 'λ', '&mu;' => 'μ', '&nu;' => 'ν', '&xi;' => 'ξ', '&omicron;' => 'ο', '&pi;' => 'π', '&rho;' => 'ρ', '&sigmaf;' => 'ς', '&sigma;' => 'σ', '&tau;' => 'τ', '&upsilon;' => 'υ', '&phi;' => 'φ', '&chi;' => 'χ', '&psi;' => 'ψ', '&omega;' => 'ω', '&thetasym;' => 'ϑ', '&upsih;' => 'ϒ', '&piv;' => 'ϖ', '&OElig;' => 'Œ', '&oelig;' => 'œ', '&Scaron;' => 'Š', '&scaron;' => 'š', '&Yuml;' => 'Ÿ', '&dagger;' => '†', '&Dagger;' => '‡');
    $text = str_replace(array_keys($html_entities), array_values($html_entities), $text);
    // Strip HTML tags
    $text = strip_tags($text);
    // Process <, > and & after all others
    $text = str_ireplace(array('&lt;', '&gt;', '&#38;', '&amp;'), array('<', '>', '&', '&'), $text);
    // Delete any unsupported entities, excess spaces and return
    return preg_replace('/[ ]{2,}/', ' ', $text);
}
 /**
  * Decodes UTF-8 numeric codes (&#xXXXX, or \uXXXX) from a content string.
  * @param string $content The content string to decode.
  * @return string A UTF-8 string where numeric codes have been converted into
  *     their UTF character representations.
  */
 public function decodeUtf8($content)
 {
     if (preg_match("/&#[xX][0-9a-zA-Z]{2,8};/", $content)) {
         $content = preg_replace("/&#[xX]([0-9a-zA-Z]{2,8});/e", "'&#'.hexdec('\$1').';'", $content);
     }
     if (preg_match("/\\\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/", $content)) {
         $content = preg_replace("/\\\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/e", "'&#'.hexdec('\$1').';'", $content);
     }
     return mb_decode_numericentity($content, array(0x0, 0xffff, 0, 0xffff), 'UTF-8');
 }