Пример #1
0
 function utf8_case_fold_nfkc($text, $option = 'full')
 {
     static $fc_nfkc_closure = array("ͺ" => " ι", "ϒ" => "υ", "ϓ" => "ύ", "ϔ" => "ϋ", "ϲ" => "σ", "Ϲ" => "σ", "ᴬ" => "a", "ᴭ" => "æ", "ᴮ" => "b", "ᴰ" => "d", "ᴱ" => "e", "ᴲ" => "ǝ", "ᴳ" => "g", "ᴴ" => "h", "ᴵ" => "i", "ᴶ" => "j", "ᴷ" => "k", "ᴸ" => "l", "ᴹ" => "m", "ᴺ" => "n", "ᴼ" => "o", "ᴽ" => "ȣ", "ᴾ" => "p", "ᴿ" => "r", "ᵀ" => "t", "ᵁ" => "u", "ᵂ" => "w", "₨" => "rs", "ℂ" => "c", "℃" => "°c", "ℇ" => "ɛ", "℉" => "°f", "ℋ" => "h", "ℌ" => "h", "ℍ" => "h", "ℐ" => "i", "ℑ" => "i", "ℒ" => "l", "ℕ" => "n", "№" => "no", "ℙ" => "p", "ℚ" => "q", "ℛ" => "r", "ℜ" => "r", "ℝ" => "r", "℠" => "sm", "℡" => "tel", "™" => "tm", "ℤ" => "z", "ℨ" => "z", "ℬ" => "b", "ℭ" => "c", "ℰ" => "e", "ℱ" => "f", "ℳ" => "m", "℻" => "fax", "ℾ" => "γ", "ℿ" => "π", "ⅅ" => "d", "㉐" => "pte", "㋌" => "hg", "㋎" => "ev", "㋏" => "ltd", "㍱" => "hpa", "㍳" => "au", "㍵" => "ov", "㍺" => "iu", "㎀" => "pa", "㎁" => "na", "㎂" => "μa", "㎃" => "ma", "㎄" => "ka", "㎅" => "kb", "㎆" => "mb", "㎇" => "gb", "㎊" => "pf", "㎋" => "nf", "㎌" => "μf", "㎐" => "hz", "㎑" => "khz", "㎒" => "mhz", "㎓" => "ghz", "㎔" => "thz", "㎩" => "pa", "㎪" => "kpa", "㎫" => "mpa", "㎬" => "gpa", "㎴" => "pv", "㎵" => "nv", "㎶" => "μv", "㎷" => "mv", "㎸" => "kv", "㎹" => "mv", "㎺" => "pw", "㎻" => "nw", "㎼" => "μw", "㎽" => "mw", "㎾" => "kw", "㎿" => "mw", "㏀" => "kω", "㏁" => "mω", "㏃" => "bq", "㏆" => "c∕kg", "㏇" => "co.", "㏈" => "db", "㏉" => "gy", "㏋" => "hp", "㏍" => "kk", "㏎" => "km", "㏗" => "ph", "㏙" => "ppm", "㏚" => "pr", "㏜" => "sv", "㏝" => "wb", "㏞" => "v∕m", "㏟" => "a∕m", "𝐀" => "a", "𝐁" => "b", "𝐂" => "c", "𝐃" => "d", "𝐄" => "e", "𝐅" => "f", "𝐆" => "g", "𝐇" => "h", "𝐈" => "i", "𝐉" => "j", "𝐊" => "k", "𝐋" => "l", "𝐌" => "m", "𝐍" => "n", "𝐎" => "o", "𝐏" => "p", "𝐐" => "q", "𝐑" => "r", "𝐒" => "s", "𝐓" => "t", "𝐔" => "u", "𝐕" => "v", "𝐖" => "w", "𝐗" => "x", "𝐘" => "y", "𝐙" => "z", "𝐴" => "a", "𝐵" => "b", "𝐶" => "c", "𝐷" => "d", "𝐸" => "e", "𝐹" => "f", "𝐺" => "g", "𝐻" => "h", "𝐼" => "i", "𝐽" => "j", "𝐾" => "k", "𝐿" => "l", "𝑀" => "m", "𝑁" => "n", "𝑂" => "o", "𝑃" => "p", "𝑄" => "q", "𝑅" => "r", "𝑆" => "s", "𝑇" => "t", "𝑈" => "u", "𝑉" => "v", "𝑊" => "w", "𝑋" => "x", "𝑌" => "y", "𝑍" => "z", "𝑨" => "a", "𝑩" => "b", "𝑪" => "c", "𝑫" => "d", "𝑬" => "e", "𝑭" => "f", "𝑮" => "g", "𝑯" => "h", "𝑰" => "i", "𝑱" => "j", "𝑲" => "k", "𝑳" => "l", "𝑴" => "m", "𝑵" => "n", "𝑶" => "o", "𝑷" => "p", "𝑸" => "q", "𝑹" => "r", "𝑺" => "s", "𝑻" => "t", "𝑼" => "u", "𝑽" => "v", "𝑾" => "w", "𝑿" => "x", "𝒀" => "y", "𝒁" => "z", "𝒜" => "a", "𝒞" => "c", "𝒟" => "d", "𝒢" => "g", "𝒥" => "j", "𝒦" => "k", "𝒩" => "n", "𝒪" => "o", "𝒫" => "p", "𝒬" => "q", "𝒮" => "s", "𝒯" => "t", "𝒰" => "u", "𝒱" => "v", "𝒲" => "w", "𝒳" => "x", "𝒴" => "y", "𝒵" => "z", "𝓐" => "a", "𝓑" => "b", "𝓒" => "c", "𝓓" => "d", "𝓔" => "e", "𝓕" => "f", "𝓖" => "g", "𝓗" => "h", "𝓘" => "i", "𝓙" => "j", "𝓚" => "k", "𝓛" => "l", "𝓜" => "m", "𝓝" => "n", "𝓞" => "o", "𝓟" => "p", "𝓠" => "q", "𝓡" => "r", "𝓢" => "s", "𝓣" => "t", "𝓤" => "u", "𝓥" => "v", "𝓦" => "w", "𝓧" => "x", "𝓨" => "y", "𝓩" => "z", "𝔄" => "a", "𝔅" => "b", "𝔇" => "d", "𝔈" => "e", "𝔉" => "f", "𝔊" => "g", "𝔍" => "j", "𝔎" => "k", "𝔏" => "l", "𝔐" => "m", "𝔑" => "n", "𝔒" => "o", "𝔓" => "p", "𝔔" => "q", "𝔖" => "s", "𝔗" => "t", "𝔘" => "u", "𝔙" => "v", "𝔚" => "w", "𝔛" => "x", "𝔜" => "y", "𝔸" => "a", "𝔹" => "b", "𝔻" => "d", "𝔼" => "e", "𝔽" => "f", "𝔾" => "g", "𝕀" => "i", "𝕁" => "j", "𝕂" => "k", "𝕃" => "l", "𝕄" => "m", "𝕆" => "o", "𝕊" => "s", "𝕋" => "t", "𝕌" => "u", "𝕍" => "v", "𝕎" => "w", "𝕏" => "x", "𝕐" => "y", "𝕬" => "a", "𝕭" => "b", "𝕮" => "c", "𝕯" => "d", "𝕰" => "e", "𝕱" => "f", "𝕲" => "g", "𝕳" => "h", "𝕴" => "i", "𝕵" => "j", "𝕶" => "k", "𝕷" => "l", "𝕸" => "m", "𝕹" => "n", "𝕺" => "o", "𝕻" => "p", "𝕼" => "q", "𝕽" => "r", "𝕾" => "s", "𝕿" => "t", "𝖀" => "u", "𝖁" => "v", "𝖂" => "w", "𝖃" => "x", "𝖄" => "y", "𝖅" => "z", "𝖠" => "a", "𝖡" => "b", "𝖢" => "c", "𝖣" => "d", "𝖤" => "e", "𝖥" => "f", "𝖦" => "g", "𝖧" => "h", "𝖨" => "i", "𝖩" => "j", "𝖪" => "k", "𝖫" => "l", "𝖬" => "m", "𝖭" => "n", "𝖮" => "o", "𝖯" => "p", "𝖰" => "q", "𝖱" => "r", "𝖲" => "s", "𝖳" => "t", "𝖴" => "u", "𝖵" => "v", "𝖶" => "w", "𝖷" => "x", "𝖸" => "y", "𝖹" => "z", "𝗔" => "a", "𝗕" => "b", "𝗖" => "c", "𝗗" => "d", "𝗘" => "e", "𝗙" => "f", "𝗚" => "g", "𝗛" => "h", "𝗜" => "i", "𝗝" => "j", "𝗞" => "k", "𝗟" => "l", "𝗠" => "m", "𝗡" => "n", "𝗢" => "o", "𝗣" => "p", "𝗤" => "q", "𝗥" => "r", "𝗦" => "s", "𝗧" => "t", "𝗨" => "u", "𝗩" => "v", "𝗪" => "w", "𝗫" => "x", "𝗬" => "y", "𝗭" => "z", "𝘈" => "a", "𝘉" => "b", "𝘊" => "c", "𝘋" => "d", "𝘌" => "e", "𝘍" => "f", "𝘎" => "g", "𝘏" => "h", "𝘐" => "i", "𝘑" => "j", "𝘒" => "k", "𝘓" => "l", "𝘔" => "m", "𝘕" => "n", "𝘖" => "o", "𝘗" => "p", "𝘘" => "q", "𝘙" => "r", "𝘚" => "s", "𝘛" => "t", "𝘜" => "u", "𝘝" => "v", "𝘞" => "w", "𝘟" => "x", "𝘠" => "y", "𝘡" => "z", "𝘼" => "a", "𝘽" => "b", "𝘾" => "c", "𝘿" => "d", "𝙀" => "e", "𝙁" => "f", "𝙂" => "g", "𝙃" => "h", "𝙄" => "i", "𝙅" => "j", "𝙆" => "k", "𝙇" => "l", "𝙈" => "m", "𝙉" => "n", "𝙊" => "o", "𝙋" => "p", "𝙌" => "q", "𝙍" => "r", "𝙎" => "s", "𝙏" => "t", "𝙐" => "u", "𝙑" => "v", "𝙒" => "w", "𝙓" => "x", "𝙔" => "y", "𝙕" => "z", "𝙰" => "a", "𝙱" => "b", "𝙲" => "c", "𝙳" => "d", "𝙴" => "e", "𝙵" => "f", "𝙶" => "g", "𝙷" => "h", "𝙸" => "i", "𝙹" => "j", "𝙺" => "k", "𝙻" => "l", "𝙼" => "m", "𝙽" => "n", "𝙾" => "o", "𝙿" => "p", "𝚀" => "q", "𝚁" => "r", "𝚂" => "s", "𝚃" => "t", "𝚄" => "u", "𝚅" => "v", "𝚆" => "w", "𝚇" => "x", "𝚈" => "y", "𝚉" => "z", "𝚨" => "α", "𝚩" => "β", "𝚪" => "γ", "𝚫" => "δ", "𝚬" => "ε", "𝚭" => "ζ", "𝚮" => "η", "𝚯" => "θ", "𝚰" => "ι", "𝚱" => "κ", "𝚲" => "λ", "𝚳" => "μ", "𝚴" => "ν", "𝚵" => "ξ", "𝚶" => "ο", "𝚷" => "π", "𝚸" => "ρ", "𝚹" => "θ", "𝚺" => "σ", "𝚻" => "τ", "𝚼" => "υ", "𝚽" => "φ", "𝚾" => "χ", "𝚿" => "ψ", "𝛀" => "ω", "𝛓" => "σ", "𝛢" => "α", "𝛣" => "β", "𝛤" => "γ", "𝛥" => "δ", "𝛦" => "ε", "𝛧" => "ζ", "𝛨" => "η", "𝛩" => "θ", "𝛪" => "ι", "𝛫" => "κ", "𝛬" => "λ", "𝛭" => "μ", "𝛮" => "ν", "𝛯" => "ξ", "𝛰" => "ο", "𝛱" => "π", "𝛲" => "ρ", "𝛳" => "θ", "𝛴" => "σ", "𝛵" => "τ", "𝛶" => "υ", "𝛷" => "φ", "𝛸" => "χ", "𝛹" => "ψ", "𝛺" => "ω", "𝜍" => "σ", "𝜜" => "α", "𝜝" => "β", "𝜞" => "γ", "𝜟" => "δ", "𝜠" => "ε", "𝜡" => "ζ", "𝜢" => "η", "𝜣" => "θ", "𝜤" => "ι", "𝜥" => "κ", "𝜦" => "λ", "𝜧" => "μ", "𝜨" => "ν", "𝜩" => "ξ", "𝜪" => "ο", "𝜫" => "π", "𝜬" => "ρ", "𝜭" => "θ", "𝜮" => "σ", "𝜯" => "τ", "𝜰" => "υ", "𝜱" => "φ", "𝜲" => "χ", "𝜳" => "ψ", "𝜴" => "ω", "𝝇" => "σ", "𝝖" => "α", "𝝗" => "β", "𝝘" => "γ", "𝝙" => "δ", "𝝚" => "ε", "𝝛" => "ζ", "𝝜" => "η", "𝝝" => "θ", "𝝞" => "ι", "𝝟" => "κ", "𝝠" => "λ", "𝝡" => "μ", "𝝢" => "ν", "𝝣" => "ξ", "𝝤" => "ο", "𝝥" => "π", "𝝦" => "ρ", "𝝧" => "θ", "𝝨" => "σ", "𝝩" => "τ", "𝝪" => "υ", "𝝫" => "φ", "𝝬" => "χ", "𝝭" => "ψ", "𝝮" => "ω", "𝞁" => "σ", "𝞐" => "α", "𝞑" => "β", "𝞒" => "γ", "𝞓" => "δ", "𝞔" => "ε", "𝞕" => "ζ", "𝞖" => "η", "𝞗" => "θ", "𝞘" => "ι", "𝞙" => "κ", "𝞚" => "λ", "𝞛" => "μ", "𝞜" => "ν", "𝞝" => "ξ", "𝞞" => "ο", "𝞟" => "π", "𝞠" => "ρ", "𝞡" => "θ", "𝞢" => "σ", "𝞣" => "τ", "𝞤" => "υ", "𝞥" => "φ", "𝞦" => "χ", "𝞧" => "ψ", "𝞨" => "ω", "𝞻" => "σ", "𝟊" => "ϝ");
     global $phpbb_root_path, $phpEx;
     // do the case fold
     $text = utf8_case_fold($text, $option);
     if (!class_exists('utf_normalizer')) {
         global $phpbb_root_path, $phpEx;
         include $phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx;
     }
     // convert to NFKC
     utf_normalizer::nfkc($text);
     // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
     $text = strtr($text, $fc_nfkc_closure);
     return $text;
 }
Пример #2
0
 /**
  * Clean up a text to remove non-alphanumeric characters
  *
  * This method receives a UTF-8 string, normalizes and validates it, replaces all
  * non-alphanumeric characters with strings then returns the result.
  *
  * Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
  *
  * @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
  * @param	string	$allowed_chars	String of special chars to allow
  * @param	string	$encoding		Text encoding
  * @return	string					Cleaned up text, only alphanumeric chars are left
  *
  * @todo normalizer::cleanup being able to be used?
  */
 function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
 {
     global $phpbb_root_path, $phpEx;
     static $conv = array(), $conv_loaded = array();
     $words = $allow = array();
     // Convert the text to UTF-8
     $encoding = strtolower($encoding);
     if ($encoding != 'utf-8') {
         $text = utf8_recode($text, $encoding);
     }
     $utf_len_mask = array("À" => 2, "Ð" => 2, "à" => 3, "ð" => 4);
     /**
      * Replace HTML entities and NCRs
      */
     $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
     /**
      * Load the UTF-8 normalizer
      *
      * If we use it more widely, an instance of that class should be held in a
      * a global variable instead
      */
     utf_normalizer::nfc($text);
     /**
      * The first thing we do is:
      *
      * - convert ASCII-7 letters to lowercase
      * - remove the ASCII-7 non-alpha characters
      * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
      *   0xC1 and 0xF5-0xFF
      *
      * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
      */
     $sb_match = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#\$%&'()*+,-./:;<=>?@[\\]^_`{|}~\v\fÀÁõö÷øùúûüýþÿ";
     $sb_replace = 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
     /**
      * This is the list of legal ASCII chars, it is automatically extended
      * with ASCII chars from $allowed_chars
      */
     $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
     /**
      * Prepare an array containing the extra chars to allow
      */
     if (isset($allowed_chars[0])) {
         $pos = 0;
         $len = strlen($allowed_chars);
         do {
             $c = $allowed_chars[$pos];
             if ($c < "€") {
                 /**
                  * ASCII char
                  */
                 $sb_pos = strpos($sb_match, $c);
                 if (is_int($sb_pos)) {
                     /**
                      * Remove the char from $sb_match and its corresponding
                      * replacement in $sb_replace
                      */
                     $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
                     $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
                     $legal_ascii .= $c;
                 }
                 ++$pos;
             } else {
                 /**
                  * UTF-8 char
                  */
                 $utf_len = $utf_len_mask[$c & "ð"];
                 $allow[substr($allowed_chars, $pos, $utf_len)] = 1;
                 $pos += $utf_len;
             }
         } while ($pos < $len);
     }
     $text = strtr($text, $sb_match, $sb_replace);
     $ret = '';
     $pos = 0;
     $len = strlen($text);
     do {
         /**
          * Do all consecutive ASCII chars at once
          */
         if ($spn = strspn($text, $legal_ascii, $pos)) {
             $ret .= substr($text, $pos, $spn);
             $pos += $spn;
         }
         if ($pos >= $len) {
             return $ret;
         }
         /**
          * Capture the UTF char
          */
         $utf_len = $utf_len_mask[$text[$pos] & "ð"];
         $utf_char = substr($text, $pos, $utf_len);
         $pos += $utf_len;
         if ($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST || $utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST || $utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST) {
             /**
              * All characters within these ranges are valid
              *
              * We separate them with a space in order to index each character
              * individually
              */
             $ret .= ' ' . $utf_char . ' ';
             continue;
         }
         if (isset($allow[$utf_char])) {
             /**
              * The char is explicitly allowed
              */
             $ret .= $utf_char;
             continue;
         }
         if (isset($conv[$utf_char])) {
             /**
              * The char is mapped to something, maybe to itself actually
              */
             $ret .= $conv[$utf_char];
             continue;
         }
         /**
          * The char isn't mapped, but did we load its conversion table?
          *
          * The search indexer table is split into blocks. The block number of
          * each char is equal to its codepoint right-shifted for 11 bits. It
          * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
          * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
          * all UTF chars encoded in 2 bytes are in the same first block.
          */
         if (isset($utf_char[2])) {
             if (isset($utf_char[3])) {
                 /**
                  * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
                  * 0000 0111 0011 1111 0010 0000
                  */
                 $idx = (ord($utf_char[0]) & 0x7) << 7 | (ord($utf_char[1]) & 0x3f) << 1 | (ord($utf_char[2]) & 0x20) >> 5;
             } else {
                 /**
                  * 1110 nnnn 10nx xxxx 10xx xxxx
                  * 0000 0111 0010 0000
                  */
                 $idx = (ord($utf_char[0]) & 0x7) << 1 | (ord($utf_char[1]) & 0x20) >> 5;
             }
         } else {
             /**
              * 110x xxxx 10xx xxxx
              * 0000 0000 0000 0000
              */
             $idx = 0;
         }
         /**
          * Check if the required conv table has been loaded already
          */
         if (!isset($conv_loaded[$idx])) {
             $conv_loaded[$idx] = 1;
             $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
             if (file_exists($file)) {
                 $conv += (include $file);
             }
         }
         if (isset($conv[$utf_char])) {
             $ret .= $conv[$utf_char];
         } else {
             /**
              * We add an entry to the conversion table so that we
              * don't have to convert to codepoint and perform the checks
              * that are above this block
              */
             $conv[$utf_char] = ' ';
             $ret .= ' ';
         }
     } while (1);
     return $ret;
 }
Пример #3
0
	/**
	* Validate and normalize a UTF string to NFKD
	*
	* @param	string	&$str	Unchecked UTF string
	* @return	string			The string, validated and in normal form
	*/
	function nfkd(&$str)
	{
		$pos = strspn($str, UTF8_ASCII_RANGE);
		$len = strlen($str);

		if ($pos == $len)
		{
			// ASCII strings return immediately
			return;
		}

		if (!isset($GLOBALS['utf_compatibility_decomp']))
		{
			include(IP_ROOT_PATH . 'includes/utf/data/utf_compatibility_decomp.' . PHP_EXT);
		}

		$str = utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
	}
Пример #4
0
/**
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
* to be in NFKC
*
* @param	mixed	$strings	a string or an array of strings to normalize
* @return	mixed				the normalized content, preserving array keys if array given.
*/
function utf8_normalize_nfkc($strings)
{
    if (empty($strings)) {
        return $strings;
    }
    if (!class_exists('utf_normalizer')) {
        global $phpbb_root_path, $phpEx;
        include $phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx;
    }
    if (!is_array($strings)) {
        utf_normalizer::nfkc($strings);
    } else {
        if (is_array($strings)) {
            foreach ($strings as $key => $string) {
                utf_normalizer::nfkc($strings[$key]);
            }
        }
    }
    return $strings;
}
Пример #5
0
 /**
  * Recompose a UTF string
  *
  * @param	string	$str			Unchecked UTF string
  * @param	integer	$pos			Position of the first UTF char (in bytes)
  * @param	integer	$len			Length of the string (in bytes)
  * @param	array	&$qc			Quick-check array, passed by reference but never modified
  * @param	array	&$decomp_map	Decomposition mapping, passed by reference but never modified
  * @return	string					The string, validated and recomposed
  *
  * @access	private
  */
 function recompose($str, $pos, $len, &$qc, &$decomp_map)
 {
     global $utf_canonical_comp;
     // Load the canonical composition table
     if (!isset($utf_canonical_comp)) {
         global $phpbb_root_path, $phpEx;
         include $phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx;
     }
     return utf_normalizer::recompose($str, $pos, $len, $qc, $decomp_map);
 }
Пример #6
0
	/**
	* Validate and normalize a UTF string to NFKD
	*
	* @param	string	&$str	Unchecked UTF string
	* @return	string			The string, validated and in normal form
	*/
	static function nfkd(&$str)
	{
		$pos = strspn($str, UTF8_ASCII_RANGE);
		$len = strlen($str);

		if ($pos == $len)
		{
			// ASCII strings return immediately
			return;
		}

		if (!isset($GLOBALS['utf_compatibility_decomp']))
		{
			global $mangareader_root_path;
			include($mangareader_root_path . 'includes/utf/data/utf_compatibility_decomp.php');
		}

		$str = utf_normalizer::decompose($str, $pos, $len, $GLOBALS['utf_compatibility_decomp']);
	}
Пример #7
0
/**
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
* to be in NFC (Normalization Form Composition).
*
* @param	mixed	$strings	a string or an array of strings to normalize
* @return	mixed				the normalized content, preserving array keys if array given.
*/
function utf8_normalize_nfc($strings)
{
	if (empty($strings))
	{
		return $strings;
	}

	if (!class_exists('utf_normalizer'))
	{
		include(IP_ROOT_PATH . 'includes/utf/utf_normalizer.' . PHP_EXT);
	}

	if (!is_array($strings))
	{
		utf_normalizer::nfc($strings);
	}
	else if (is_array($strings))
	{
		foreach ($strings as $key => $string)
		{
			if (is_array($string))
			{
				foreach ($string as $_key => $_string)
				{
					utf_normalizer::nfc($strings[$key][$_key]);
				}
			}
			else
			{
				utf_normalizer::nfc($strings[$key]);
			}
		}
	}

	return $strings;
}
Пример #8
0
/**
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
* to be in NFC (Normalization Form Composition).
*
* @param	mixed	$strings	a string or an array of strings to normalize
* @return	mixed				the normalized content, preserving array keys if array given.
*/
function utf8_normalize_nfc($strings)
{
	if (empty($strings))
	{
		return $strings;
	}

	if (!class_exists('utf_normalizer'))
	{
		global $mangareader_root_path;
		include($mangareader_root_path . 'includes/utf/utf_normalizer.php');
	}

	if (!is_array($strings))
	{
		utf_normalizer::nfc($strings);
	}
	else if (is_array($strings))
	{
		foreach ($strings as $key => $string)
		{
			if (is_array($string))
			{
				foreach ($string as $_key => $_string)
				{
					utf_normalizer::nfc($strings[$key][$_key]);
				}
			}
			else
			{
				utf_normalizer::nfc($strings[$key]);
			}
		}
	}

	return $strings;
}