/** * Given a decimal number, return the UTF-8 character. */ public static function lookupDecimal($int) { $entity = '&#' . $int . ';'; // UNTESTED: This may fail on some planes. Couldn't find full documentation // on the value of the mask array. return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8'); }
/** * Setup: Decode our test strings aheads of time and disable the MultiByte library. */ protected function setup() { $convmap = array(0x80, 0xffff, 0, 0xffff); foreach ($this->test_strings as $key => $value) { $this->test_strings[$key] = mb_decode_numericentity($value, $convmap, 'utf-8'); } }
function motopressCEJsonEncode($array) { //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding $options = array('convmap' => array(0x80, 0xffff, 0, 0xffff), 'encoding' => 'UTF-8'); array_walk_recursive($array, 'motopressCEMbEncodeNumericentity', $options); return mb_decode_numericentity(json_encode($array), $options['convmap'], $options['encoding']); }
/** * For nicer placement in our textareas - but are we using this really? * @param string $text * @param string $process * @return string */ function stripForForm($text = '', $process = '') { if (empty($process)) { // have we checked this yet if (function_exists('mb_decode_numericentity')) { return mb_decode_numericentity($text, UTF8EntConvert('1'), 'utf-8'); } else { $text = htmlspecialchars($text); return str_replace(array(">", "<"), array(">", "<"), $text); } } if ($text) { $out = str_replace("<p>", "", $text); $out = str_replace(array("<br />", "<br>"), array("", ""), $out); $out = str_replace("</p>", "", $out); if (function_exists('mb_decode_numericentity')) { $out = mb_decode_numericentity($out, UTF8EntConvert('1'), 'utf-8'); } else { $out = htmlspecialchars($out); $out = str_replace(array(">", "<"), array(">", "<"), $out); } return $out; } else { return ''; } }
/** * Polyfill for json_encode JSON_UNESCAPED_UNICODE (new in PHP 5.4.0) for PHP 5.3 */ function kfJsonEncode($arr) { array_walk_recursive($arr, function (&$item, $key) { if (is_string($item)) { $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); } }); return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); }
/** * Convert character encoding from Shift_JIS to UTF-8. * * @param string $text * @return string */ function _convertSjisToUtf8($text) { $pattern = '/\\x1B\\x24([\\x45-\\x47\\x4F-\\x51][\\x21-\\x7A]+)\\x0F?/'; $callback = array($this, '_convertWebcodeToEntity'); $text = preg_replace_callback($pattern, $callback, $text); $text = mb_convert_encoding($text, 'UTF-8', 'SJIS-win'); $text = mb_decode_numericentity($text, $this->_utf8map, 'UTF-8'); return $text; }
/** * Setup: Decode our test strings aheads of time and disable the MultiByte library. */ protected function setup() { $convmap = array(0x80, 0xffff, 0, 0xffff); foreach ($this->test_strings as $key => $value) { $this->test_strings[$key] = mb_decode_numericentity($value, $convmap, 'utf-8'); } // disable using a multibyte library $this->old_library = MultiByte::library(false); }
public function decodeNumericEntity($text) { if (function_exists('mb_decode_numericentity')) { $convmap = array(0x0, 0x2ffff, 0, 0xffff); return mb_decode_numericentity($text, $convmap, 'UTF-8'); } else { return $text; } }
function sphinx_keyword($keyword, $index_data = '') { $this->index_data = $index_data ? $index_data : $this->index_data; //Cắt ngắn if (mb_strlen($keyword, "UTF-8") > $this->max_keyword_length) { $keyword = mb_substr($keyword, 0, $this->max_keyword_length, "UTF-8"); } $this->keyword = mb_strtolower($keyword, "UTF-8"); //echo "2"; //Remove " $this->keyword = str_replace(""", "", $this->keyword); //Replace các bad character $array_bad_word = array("?", "^", ",", ";", "*", "/", "~", "@", "-", "!", "[", "]", "(", ")", "=", "|"); $this->keyword = str_replace($array_bad_word, "", $this->keyword); //Chống các ký tự ô vuông, convert lại đúng kiểu UTF-8 $this->keyword = mb_convert_encoding($this->keyword, "UTF-8", "UTF-8"); //Xóa bỏ ký tự NCR $convmap = array(0x0, 0x2ffff, 0, 0xffff); $this->keyword = @mb_decode_numericentity($this->keyword, $convmap, "UTF-8"); //echo "3"; $j = -1; //Lấy keyword còn lại sau, bẻ dấu cách $array_temp = explode(" ", $this->keyword); for ($i = 0; $i < count($array_temp); $i++) { if (trim($array_temp[$i]) != "") { //Những keyword có độ dài > 1 mới cho vào array if (mb_strlen(trim($array_temp[$i]), "UTF-8") > 1) { $j++; $this->array_keyword[$j][0] = str_replace("'", "''", trim($array_temp[$i])); } } } $quorum = count($array_temp) * 3 / 5; $quorum = intval($quorum); if ($quorum < 2) { $quorum = 2; } $this->keyword = trim($this->keyword); $this->original_keyword = $this->keyword; //echo $this->keyword; //Cấu hình sphinx tại localhost if (@$_SERVER['SERVER_NAME'] == "localhost") { $this->sphinx_host = "127.0.0.1"; $this->sphinx_port = 9312; } //echo "3"; //Khởi tạo class và mở kết nối đến server $this->sphinx = new SphinxClient(); $this->sphinx->SetServer($this->sphinx_host, $this->sphinx_port); $this->sphinx->SetConnectTimeout(1.5); $this->sphinx->SetMatchMode(SPH_MATCH_ANY); //Lấy max 5030 kết quả trả về $this->sphinx->_maxmatches = 330; $this->sphinx->Open(); //echo "4"; }
function json_encode_readable($arr) { //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding array_walk_recursive($arr, function (&$item, $key) { if (is_string($item)) { $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); } }); return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); }
function json($data) { $CI =& get_instance(); if ($CI->input->is_ajax_request()) { return json_encode($data); } $data = str_replace('<br />', '', $data); return preg_replace_callback('/\\\\u([0-9a-f]{4})/i', function ($val) { return mb_decode_numericentity('&#' . intval($val[1], 16) . ';', array(0, 0xffff, 0, 0xffff), 'utf-8'); }, json_encode($data)); }
/** * Callback function called by the filter() method. * * This function converts Unicode hexadecimal number to UTF-8 emoji. * * @param array $matches * @return string */ function _convertEntityToUtf8($matches) { $unicode = hexdec($matches[1]); $entity = '&#' . $unicode . ';'; $utf8 = mb_decode_numericentity($entity, $this->_convmap, 'UTF-8'); if ($entity !== $utf8) { return $utf8; } else { return $matches[0]; } }
/** * HTMLデコードした文字列を返す * @param string $value 対象の文字列 * @return string */ public static function htmldecode($value) { if (!empty($value) && is_string($value)) { $value = mb_convert_encoding($value, 'UTF-8', mb_detect_encoding($value)); $value = preg_replace_callback("/&#[xX]([0-9a-fA-F]+);/u", function ($m) { return '&#' . hexdec($m[1]) . ';'; }, $value); $value = mb_decode_numericentity($value, array(0x0, 0x10000, 0, 0xfffff), "UTF-8"); $value = html_entity_decode($value, ENT_QUOTES, "UTF-8"); $value = str_replace(array("\\\"", "\\'", "\\\\"), array("\"", "\\'", "\\"), $value); } return $value; }
/** * @param mixed $number * * @return string */ public static function fromDecimal($number) { // Only convert code points within planes 0-2, excluding NULL if (empty($number) || $number > 0x2ffff) { return self::fromHex('fffd'); } $entity = '&#' . $number . ';'; $converted = mb_decode_numericentity($entity, [0x0, 0x2ffff, 0, 0xffff], 'UTF-8'); if ($converted === $entity) { return self::fromHex('fffd'); } return $converted; }
/** * @author devilan (REMOVEIT) (at) o2 (dot) pl * For PHP5.3 users who want to emulate JSON_UNESCAPED_UNICODE * @see https://php.net/manual/en/function.json-encode.php#105789 */ public static function associativeArrayToJsonStr($arr, $optionsBitMask = 0) { if (defined('JSON_UNESCAPED_UNICODE')) { return json_encode($arr, JSON_UNESCAPED_UNICODE | $optionsBitMask); } $convmap = array(0x80, 0xffff, 0, 0xffff); //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding array_walk_recursive($arr, function (&$item, $key) use(&$convmap) { if (is_string($item)) { $item = mb_encode_numericentity($item, $convmap, 'UTF-8'); } }); return mb_decode_numericentity(json_encode($arr, $optionsBitMask), $convmap, 'UTF-8'); }
private function fromNumericEntities($pValue) { $convmap = array(0x80, 0xff, 0, 0xff); if (!is_array($pValue)) { $specialChars = array("”" => '"', "“" => '"', "„" => '"', "–" => '-', "—" => '_', "‘" => "'", "’" => "'", "‚" => "'"); foreach ($specialChars as $k => $v) { $pValue = preg_replace("/" . $k . "/", $v, $pValue); } return mb_decode_numericentity($pValue, $convmap, "UTF-8"); } foreach ($pValue as &$value) { $value = $this->fromNumericEntities($value); } return $pValue; }
/** * Méthode static de décodage récursif des entités numériques * @static * @param mixed $pValue * @return mixed|string */ public static function fromNumericEntities($pValue) { $convmap = array(0x80, 0xff, 0, 0xff); if (!is_array($pValue)) { $specialChars = array("”" => '"', "“" => '"', "„" => '"', "–" => '-', "—" => '_', "‘" => "'", "’" => "'", "‚" => "'"); foreach ($specialChars as $k => $v) { $pValue = preg_replace("/" . $k . "/", $v, $pValue); } return mb_decode_numericentity($pValue, $convmap, Configuration::$global_encoding); } foreach ($pValue as &$value) { $value = self::fromNumericEntities($value); } return $pValue; }
protected function parseParameters($parameters) { if (!empty($parameters) && is_array($parameters)) { $object = new stdClass(); $parent = $this->getParent(); $object->{$parent} = new stdClass(); $parent = $object->{$parent}; if ($this->actionInclude('/reorder')) { foreach ($parameters as $id) { $item = new stdClass(); $item->id = $id; $parent->{$this->parent}[] = $item; } } else { foreach ($this->fields as $field => $options) { $value = $this->getValue($field, $options, $parameters); if (isset($options['attributes'])) { foreach ($options['attributes'] as $name => $type) { if (null !== $value) { if ($name === 'type') { if ($type === 'array') { if (is_string($value) || is_numeric($value)) { $value = (array) $value; } else { $value = null; } } else { settype($value, $type); } } } } } if (null !== $value) { if (is_string($value)) { $value = mb_encode_numericentity($value, [0x80, 0xffff, 0, 0xffff], 'utf-8'); } !empty($options['sibling']) ? $object->{$field} = $value : ($parent->{$field} = $value); } } } $parameters = json_encode($object); $parameters = mb_decode_numericentity($parameters, [0x80, 0xffff, 0, 0xffff], 'utf-8'); } else { $parameters = '{}'; } return $parameters; }
public function jsonEncode(&$arr) { //convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding array_walk_recursive($arr, function (&$item, $key) { if (is_string($item)) { $item = mb_encode_numericentity($item, array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); } elseif (is_object($item)) { $reflection = new \ReflectionObject($item); $props = $reflection->getProperties(); $tmp = array(); foreach ($props as $prop) { $name = substr($prop->getName(), 1); $value = ''; try { $method = $reflection->getMethod('get' . ucfirst($name)); $value = $method->invoke($item); } catch (\Exception $ex) { if ($reflection->name == 'org\\autoset\\santorini\\vo\\VirtualFormVO') { $value = $item->__call('get' . ucfirst($name), null); } } if ($value instanceof JSONString) { $value = $value->toString(); } elseif (is_string($value)) { $value = mb_encode_numericentity($value, array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); } elseif (is_array($value)) { $value = json_decode($this->jsonEncode($value)); } elseif (is_object($value)) { $value = $this->jsonEncode($value); } $tmp[$name] = $value; } $item = $tmp; } }); return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); }
function decode_high($text, $charset = "UTF-8") { return mb_decode_numericentity($text, $this->cmap(), $charset); }
static function correct($str, $broken = '') { $corrected = ''; $strlen = strlen($str); for ($i = 0; $i < $strlen; $i++) { switch ($str[$i]) { case "\t": case "\n": case "\r": $corrected .= $str[$i]; break; case "": $corrected .= $broken; break; default: $high = ord($str[$i]); if ($high < 0x20) { // Special Characters. $corrected .= $broken; } else { if ($high < 0x80) { // 1byte. $corrected .= $str[$i]; } else { if ($high <= 0xc1) { $corrected .= $broken; } else { if ($high < 0xe0) { // 2byte. if ($i + 1 >= $strlen || ($str[$i + 1] & "À") != "€") { $corrected .= $broken; } else { $corrected .= $str[$i] . $str[$i + 1]; } $i += 1; } else { if ($high < 0xf0) { // 3byte. if ($i + 2 >= $strlen || ($str[$i + 1] & "À") != "€" || ($str[$i + 2] & "À") != "€") { $corrected .= $broken; } else { $corrected .= $str[$i] . $str[$i + 1] . $str[$i + 2]; } $i += 2; } else { if ($high < 0xf5) { // 4byte. if ($i + 3 >= $strlen || ($str[$i + 1] & "À") != "€" || ($str[$i + 2] & "À") != "€" || ($str[$i + 3] & "À") != "€") { $corrected .= $broken; } else { $corrected .= $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3]; } $i += 3; } else { // F5~FF is invalid by RFC3629. $corrected .= $broken; } } } } } } break; } } if (preg_match('/&#([0-9]{1,});/', $corrected)) { $corrected = mb_decode_numericentity($corrected, array(0x0, 0x10000, 0, 0xfffff), 'UTF-8'); } return $corrected; }
/** * Converts numeric HTML character references to character code. * * @param string $text The input * @param string $charset The character set * @return string Processed input */ protected function decodeHigh($text, $charset = 'UTF-8') { $text = ctype_digit($text) ? "&#{$text};" : "&{$text};"; return $this->mb ? mb_decode_numericentity($text, $this->cmap, $charset) : html_entity_decode($text, ENT_NOQUOTES, $charset); }
/** * Wrapper for json_encode function. * Emulates JSON_UNESCAPED_UNICODE. * * @param type $arr * @return JSON * @author peshkov@UD */ public static function json_encode($arr) { // convmap since 0x80 char codes so it takes all multibyte codes (above ASCII 127). So such characters are being "hidden" from normal json_encoding array_walk_recursive($arr, create_function('&$item, $key', 'if (is_string($item)) $item = mb_encode_numericentity($item, array (0x80, 0xffff, 0, 0xffff), "UTF-8");')); return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); }
function ajax_process_news() { $this->_create_db_client(); if (!get_t3i_options('debug_mode')) { error_reporting(0); // Don't break the JSON result header('Content-type: application/json'); $this->news_uid = (int) $_REQUEST['id']; } // grab the record from TYPO3 $news = $this->get_news($this->news_uid); if (!is_array($news) || $news['itemid'] != $this->news_uid) { die(json_encode(array('error' => sprintf(__("Failed import: %s isn't a TYPO3 news record.", 'typo3-importer'), esc_html($_REQUEST['id']))))); } if (get_t3i_options('decode_entities')) { $conv_map = array(0x0, 0x10000, 0, 0xfffff); foreach ($news as $key => $value) { if (!is_array($value)) { $news[$key] = mb_decode_numericentity($value, $conv_map, 'UTF-8'); } else { foreach ($value as $vKey => $vValue) { $value[$vKey] = mb_decode_numericentity($vValue, $conv_map, 'UTF-8'); } $news[$key] = $value; } } } // TODO progress by post // process and import news post $post_id = $this->import_news_as_post($news); $this->featured_image_id = false; // replace original external images with internal $this->_typo3_replace_images($post_id); // Handle all the metadata for this post $this->insert_postmeta($post_id, $news); if (get_t3i_options('set_featured_image') && $this->featured_image_id) { update_post_meta($post_id, "_thumbnail_id", $this->featured_image_id); } if (!get_t3i_options('no_comments_import')) { $this->process_comments(); } die(json_encode(array('success' => sprintf(__('"<a href="%1$s" target="_blank">%2$s</a>" Post ID %3$s was successfully processed in %4$s seconds.', 'typo3-importer'), get_permalink($post_id), esc_html(get_the_title($post_id)), $post_id, timer_stop())))); }
/** * @param $arr * @return string * courtesy from: http://www.php.net/manual/ru/function.json-encode.php#105789 */ public static function json_encode_unescaped_unicode($arr) { array_walk_recursive($arr, array(__CLASS__, 'json_unescaped_unicode_walk_callback')); return mb_decode_numericentity(json_encode($arr), array(0x80, 0xffff, 0, 0xffff), 'UTF-8'); }
function afterFilter() { $_data = $this->c->output; if ($this->is_ezweb()) { // KDDI $_data = str_replace("<html>", "<?xml version=\"1.0\" encoding=\"Shift_JIS\"?><!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">", $_data); $_data = str_replace("font-size:small", "font-size:12px", $_data); } elseif ($this->is_imode()) { // DoCoMo $_data = str_replace("<html>", "<?xml version=\"1.0\" encoding=\"Shift_JIS\"?><!DOCTYPE html PUBLIC \"-//i-mode group (ja)//DTD XHTML i-XHTML(Locale/Ver.=ja/2.0) 1.0//EN\" \"i-xhtml_4ja_10.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">", $_data); $_data = str_replace("istyle=\"1\"", "style=\"-wap-input-format:"*<ja:h>"\"", $_data); $_data = str_replace("istyle=\"3\"", "style=\"-wap-input-format:"*<ja:en>"\"", $_data); $_data = str_replace("istyle=\"4\"", "style=\"-wap-input-format:"*<ja:n>"\"", $_data); // 最近は全部GIFっぽい //$_data = preg_replace("/<img src=\"(.+?)\.(gif)\"/", '<img src="\\1.png"', $_data); } elseif ($this->is_softbank()) { // SoftBank $_data = str_replace("istyle=\"1\"", "mode=\"hiragana\"", $_data); $_data = str_replace("istyle=\"3\"", "mode=\"alphabet\"", $_data); $_data = str_replace("istyle=\"4\"", "mode=\"numeric\"", $_data); } if ($this->is_mobile()) { $_data = $this->convertMobile($_data); $_data = mb_convert_kana($_data, "kVrns", 'UTF-8'); header("Content-type: application/xhtml+xml; charset=Shift_JIS"); if ($this->is_ezweb()) { $sjismap = array(0xe234, 0xe272, 0xa0c, 0xffff, 0xe273, 0xe2ef, 0xa0d, 0xffff, 0xe2f0, 0xe32e, 0xa50, 0xffff, 0xe32f, 0xe342, 0xa51, 0xffff, 0xe468, 0xe4a6, 0xad8, 0xffff, 0xe4a7, 0xe523, 0xad9, 0xffff, 0xe524, 0xe562, 0xb1c, 0xffff, 0xe563, 0xe5df, 0xb1d, 0xffff); $utf8map = array(0xec40, 0xecfc, 0x0, 0xffff, 0xed40, 0xed93, 0x0, 0xffff, 0xef40, 0xeffc, 0x0, 0xffff, 0xf040, 0xf0fc, 0x0, 0xffff); $_data = mb_encode_numericentity($_data, $utf8map, 'UTF-8'); $_data = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8'); $this->c->output = mb_decode_numericentity($_data, $sjismap, 'SJIS-win'); } elseif ($this->is_softbank()) { $utf8map = array(0xe001, 0xe05a, 0x0, 0xffff, 0xe101, 0xe15a, 0x0, 0xffff, 0xe201, 0xe25a, 0x0, 0xffff, 0xe301, 0xe34d, 0x0, 0xffff, 0xe401, 0xe44c, 0x0, 0xffff, 0xe501, 0xe53e, 0x0, 0xffff); $_data = mb_encode_numericentity($_data, $utf8map, 'UTF-8'); $_data = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8'); $pattern = '/&#(5\\d{4});/'; $callback = array($this, '_softbank_convertUnicodeToSjis'); $this->c->output = preg_replace_callback($pattern, $callback, $_data); } else { $this->c->output = mb_convert_encoding($_data, 'SJIS-win', 'UTF-8'); } } else { $this->c->output = $this->convertPC($_data); } }
function decode_high($text) { $cmap = cmap(); return mb_decode_numericentity($text, $cmap, "UTF-8"); }
function htmlCharsDecode($str) { $convertMap = array(0x0, 0x2ffff, 0, 0xffff); return mb_decode_numericentity($str, $convertMap, 'UTF-8'); }
function hesk_convert_to_utf8_and_clean_html_entities($text) { // Can we use the multibyte functionality of PHP? if (function_exists('mb_decode_numericentity')) { $text = mb_decode_numericentity($text, array(0x0, 0x2ffff, 0, 0xffff), 'UTF-8'); } else { $text = preg_replace_callback('/&#([0-9a-fx]+);/mi', 'hesk_replace_num_entity', $text); } // Entities that are not case sensitive $html_entities = array('"' => '"', '”' => '"', '“' => '"', '”' => '"', '„' => '"', '′' => '"', ''' => '\'', '‘' => '\'', '’' => '\'', '′' => '\'', '´' => '\'', ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ', '¡' => '¡', '¢' => '¢', '£' => '£', '¤' => '¤', '¥' => '¥', '¦' => '¦', '§' => '§', '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬', '­' => '', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±', '²' => '²', '³' => '³', 'µ' => 'µ', '¶' => '¶', '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»', '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', '×' => '×', '÷' => '÷', '∀' => '∀', '∂' => '∂', '∃' => '∃', '∅' => '∅', '∇' => '∇', '∈' => '∈', '∉' => '∉', '∋' => '∋', '∏' => '∏', '∑' => '∑', '−' => '−', '∗' => '∗', '√' => '√', '∝' => '∝', '∞' => '∞', '∠' => '∠', '∧' => '∧', '∨' => '∨', '∩' => '∩', '∪' => '∪', '∫' => '∫', '∴' => '∴', '∼' => '∼', '≅' => '≅', '≈' => '≈', '≠' => '≠', '≡' => '≡', '≤' => '≤', '≥' => '≥', '⊂' => '⊂', '⊃' => '⊃', '⊄' => '⊄', '⊆' => '⊆', '⊇' => '⊇', '⊕' => '⊕', '⊗' => '⊗', '⊥' => '⊥', '⋅' => '⋅', 'ƒ' => 'ƒ', 'ˆ' => 'ˆ', '˜' => '˜', '–' => '–', '—' => '—', '‚' => ',', '•' => '•', '…' => '…', '‰' => '‰', '‹' => '‹', '›' => '›', '‾' => '‾', '€' => '€', '™' => '™', '←' => '←', '↑' => '↑', '→' => '→', '↓' => '↓', '↔' => '↔', '◊' => '◊', '♠' => '♠', '♣' => '♣', '♥' => '♥', '♦' => '♦'); $text = str_ireplace(array_keys($html_entities), array_values($html_entities), $text); // Case sensitive entities $html_entities = array('À' => 'À', 'Á' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Å' => 'Å', 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', 'Ë' => 'Ë', 'Ì' => 'Ì', 'Í' => 'Í', 'Î' => 'Î', 'Ï' => 'Ï', 'Ð' => 'Ð', 'Ñ' => 'Ñ', 'Ò' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', 'Ø' => 'Ø', 'Ù' => 'Ù', 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', 'Ý' => 'Ý', 'Þ' => 'Þ', 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã', 'ä' => 'ä', 'å' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è', 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í', 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò', 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü', 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ', 'Α' => 'Α', 'Β' => 'Β', 'Γ' => 'Γ', 'Δ' => 'Δ', 'Ε' => 'Ε', 'Ζ' => 'Ζ', 'Η' => 'Η', 'Θ' => 'Θ', 'Ι' => 'Ι', 'Κ' => 'Κ', 'Λ' => 'Λ', 'Μ' => 'Μ', 'Ν' => 'Ν', 'Ξ' => 'Ξ', 'Ο' => 'Ο', 'Π' => 'Π', 'Ρ' => 'Ρ', 'Σ' => 'Σ', 'Τ' => 'Τ', 'Υ' => 'Υ', 'Φ' => 'Φ', 'Χ' => 'Χ', 'Ψ' => 'Ψ', 'Ω' => 'Ω', 'α' => 'α', 'β' => 'β', 'γ' => 'γ', 'δ' => 'δ', 'ε' => 'ε', 'ζ' => 'ζ', 'η' => 'η', 'θ' => 'θ', 'ι' => 'ι', 'κ' => 'κ', 'λ' => 'λ', 'μ' => 'μ', 'ν' => 'ν', 'ξ' => 'ξ', 'ο' => 'ο', 'π' => 'π', 'ρ' => 'ρ', 'ς' => 'ς', 'σ' => 'σ', 'τ' => 'τ', 'υ' => 'υ', 'φ' => 'φ', 'χ' => 'χ', 'ψ' => 'ψ', 'ω' => 'ω', 'ϑ' => 'ϑ', 'ϒ' => 'ϒ', 'ϖ' => 'ϖ', 'Œ' => 'Œ', 'œ' => 'œ', 'Š' => 'Š', 'š' => 'š', 'Ÿ' => 'Ÿ', '†' => '†', '‡' => '‡'); $text = str_replace(array_keys($html_entities), array_values($html_entities), $text); // Strip HTML tags $text = strip_tags($text); // Process <, > and & after all others $text = str_ireplace(array('<', '>', '&', '&'), array('<', '>', '&', '&'), $text); // Delete any unsupported entities, excess spaces and return return preg_replace('/[ ]{2,}/', ' ', $text); }
/** * Decodes UTF-8 numeric codes (&#xXXXX, or \uXXXX) from a content string. * @param string $content The content string to decode. * @return string A UTF-8 string where numeric codes have been converted into * their UTF character representations. */ public function decodeUtf8($content) { if (preg_match("/&#[xX][0-9a-zA-Z]{2,8};/", $content)) { $content = preg_replace("/&#[xX]([0-9a-zA-Z]{2,8});/e", "'&#'.hexdec('\$1').';'", $content); } if (preg_match("/\\\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/", $content)) { $content = preg_replace("/\\\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})/e", "'&#'.hexdec('\$1').';'", $content); } return mb_decode_numericentity($content, array(0x0, 0xffff, 0, 0xffff), 'UTF-8'); }