/** * @covers Patchwork\PHP\Override\Intl::grapheme_strlen */ function testGrapheme_strlen() { $this->assertSame(3, grapheme_strlen('한국어')); $this->assertSame(3, grapheme_strlen(n::normalize('한국어', n::NFD))); $this->assertSame(3, p::grapheme_strlen('한국어')); $this->assertSame(3, p::grapheme_strlen(n::normalize('한국어', n::NFD))); }
/** * Class constructor. * * @param mixed $string * @param string $encoding * @throws \InvalidArgumentException */ public function __construct($string, $encoding = self::ENCODING) { if (is_null($string)) { $string = 'null'; } elseif (is_bool($string)) { $string = $string ? 'true' : 'false'; } elseif (is_int($string) || is_float($string)) { $string = (string) $string; } elseif (is_object($string)) { if (!method_exists($string, '__toString')) { throw new \InvalidArgumentException(sprintf('Object of class %s cannot be converted to String', get_class($string))); } $string = (string) $string; } elseif (!is_string($string)) { throw new \InvalidArgumentException('Cannot convert a variable of type ' . gettype($string) . ' to String'); } if (!self::checkEncoding($encoding)) { throw new \InvalidArgumentException('Unsupported encoding: ' . $encoding); } if (!mb_check_encoding($string, $encoding)) { throw new \InvalidArgumentException('String is not encoded in ' . $encoding); } if ($encoding != self::ENCODING) { $string = mb_convert_encoding($string, self::ENCODING, $encoding); } $string = \Normalizer::normalize($string); $this->string = $string; $this->length = mb_strlen($string, self::ENCODING); }
/** * @covers Patchwork\PHP\Override\Mbstring::mb_strlen */ function testmb_strlen() { $this->assertSame(3, mb_strlen('한국어')); $this->assertSame(8, mb_strlen(n::normalize('한국어', n::NFD))); $this->assertSame(3, p::mb_strlen('한국어')); $this->assertSame(8, p::mb_strlen(n::normalize('한국어', n::NFD))); }
public static function message($publishHandler, eZContentObject $object, $message, $messageLength = null, $options) { $url = false; if (isset($options['include_url']) && (bool) $options['include_url'] === true) { $url = $object->attribute('main_node')->attribute('url_alias'); eZURI::transformURI($url, true, 'full'); if (isset($options['shorten_url']) && (bool) $options['shorten_url'] === true) { $urlReturned = $publishHandler->shorten($url, $options['shorten_handler']); if (is_string($urlReturned)) { $url = $urlReturned; } } if ($messageLength != null) { $messageLength = $messageLength - strlen($url) - 1; } } if (class_exists('Normalizer')) { $message = Normalizer::normalize($message, Normalizer::FORM_C); } if ($messageLength != null) { $message = mb_substr($message, 0, $messageLength); } if ($url) { $message .= ' ' . $url; } return $message; }
protected function applyValue($input, Context $ctx) { $output = $input; if ($input === null || $input === true || $input === false) { goto done; } if ($this->allowLoose) { $checkValue = $input; if (is_string($checkValue)) { $checkValue = mb_strtolower(\Normalizer::normalize($checkValue)); } if (in_array($checkValue, $this->trueValues ?: self::$defaultLooseTrue, true)) { $output = true; } elseif (in_array($checkValue, $this->falseValues ?: self::$defaultLooseFalse, true)) { $output = false; } } if ($output !== false && $output !== true) { $ctx->addReason($this, ['id' => 'bool.invalid']); } done: if ($output !== $input) { $ctx->setChange(Change::Internal); } return $output; }
public function testUnknown() { $data = fopen('php://memory', 'rb'); fclose($data); $normalized = $this->normalizer->normalize($data); $this->assertEquals('[unknown(' . gettype($data) . ')]', $normalized); }
function simplify_strings($string) { //Normalisation de la chaine utf8 en mode caractère + accents $string = Normalizer::normalize($string, Normalizer::FORM_D); //Suppression des accents et minuscules return strtolower(preg_replace('~\\p{Mn}~u', '', $string)); }
/** * 入力を妥当な値に変換します。 * @param string $input * @return string 変換できなかった場合は空文字列を返します。 */ protected function convertToValidCharacters(string $input) : string { $converted = preg_replace('/[\\p{C}\\p{Z}\\p{M}]+/u', '', \Normalizer::normalize(str_replace('~', '〜', $input), \Normalizer::FORM_KC)); if ($this->isRegExp($converted)) { $converted = trim($converted, '/'); } return $converted; }
function asciify($text) { global $special_cases, $special_cases_keys; $text = Normalizer::normalize($text, Normalizer::FORM_C); $text = str_replace($special_cases_keys, $special_cases, $text); $text = Normalizer::normalize($text, Normalizer::FORM_D); return preg_replace('/[^\\x20-\\x7E]/', '', $text); }
public function testNormalizer() { $english = Normalizer::factory(); $greek = Normalizer::factory("Greek"); $this->assertEquals(explode(" ", "ο μορφωμενοσ διαφερει απο τον αμορφωτο οσο ο ζωντανοσ απο τον νεκρο"), $greek->normalizeAll(explode(" ", "Ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό"))); $this->assertEquals(explode(" ", "ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό"), $english->normalizeAll(explode(" ", "Ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό"))); $this->assertEquals(explode(" ", "when a father gives to his son both laugh when a son gives to his father both cry"), $english->normalizeAll(explode(" ", "When a father gives to his son both laugh when a son gives to his father both cry"))); }
function testStringUnicodeDifferentNormalisationEqual() { $a = \Normalizer::normalize("ő", \Normalizer::FORM_KC); $b = \Normalizer::normalize("ő", \Normalizer::FORM_D); $this->assertNotEquals($a, $b); $flat = $this->compare($a, $b); $this->assertTrue($flat->valid); }
/** * Class constructor. * * @param mixed $string * * @throws \InvalidArgumentException */ public function __construct(string $string) { if (!mb_check_encoding($string, 'UTF-8')) { throw new \InvalidArgumentException('String is not valid UTF-8'); } $string = \Normalizer::normalize($string); $this->string = $string; $this->length = mb_strlen($string, 'UTF-8'); }
/** * Public factory method. * * @param string $string * * @return UnicodeString * * @throws \InvalidArgumentException */ public static function of(string $string) { if (!mb_check_encoding($string, 'UTF-8')) { throw new \InvalidArgumentException('String is not valid UTF-8'); } $string = \Normalizer::normalize($string); $length = mb_strlen($string, 'UTF-8'); return new UnicodeString($string, $length); }
/** * @see https://github.com/alixaxel/phunction/blob/master/phunction/Text.php#L297 */ public static function unaccent($string) { if (extension_loaded('intl') === true) { $string = \Normalizer::normalize($string, \Normalizer::FORM_KD); } if (strpos($string = htmlentities($string, ENT_QUOTES, 'UTF-8'), '&') !== false) { $string = html_entity_decode(preg_replace('~&([a-z]{1,2})(?:acute|caron|cedil|circ|grave|lig|orn|ring|slash|tilde|uml);~i', '$1', $string), ENT_QUOTES, 'UTF-8'); } return $string; }
private function checkAndNormaliseEncoding(string $text) : string { if (!mb_check_encoding($text, self::ENCODING)) { throw new MessagePostFailureException('Message text encoding invalid'); } $text = \Normalizer::normalize(rtrim($text), \Normalizer::FORM_C); if ($text === false) { throw new MessagePostFailureException('Failed to normalize message text'); } return $text; }
function utf8_new_case_fold_nfkc($text, $option = 'full') { static $fc_nfkc_closure = array("ͺ" => " ι", "ϒ" => "υ", "ϓ" => "ύ", "ϔ" => "ϋ", "ϲ" => "σ", "Ϲ" => "σ", "ᴬ" => "a", "ᴭ" => "æ", "ᴮ" => "b", "ᴰ" => "d", "ᴱ" => "e", "ᴲ" => "ǝ", "ᴳ" => "g", "ᴴ" => "h", "ᴵ" => "i", "ᴶ" => "j", "ᴷ" => "k", "ᴸ" => "l", "ᴹ" => "m", "ᴺ" => "n", "ᴼ" => "o", "ᴽ" => "ȣ", "ᴾ" => "p", "ᴿ" => "r", "ᵀ" => "t", "ᵁ" => "u", "ᵂ" => "w", "₨" => "rs", "ℂ" => "c", "℃" => "°c", "ℇ" => "ɛ", "℉" => "°f", "ℋ" => "h", "ℌ" => "h", "ℍ" => "h", "ℐ" => "i", "ℑ" => "i", "ℒ" => "l", "ℕ" => "n", "№" => "no", "ℙ" => "p", "ℚ" => "q", "ℛ" => "r", "ℜ" => "r", "ℝ" => "r", "℠" => "sm", "℡" => "tel", "™" => "tm", "ℤ" => "z", "ℨ" => "z", "ℬ" => "b", "ℭ" => "c", "ℰ" => "e", "ℱ" => "f", "ℳ" => "m", "℻" => "fax", "ℾ" => "γ", "ℿ" => "π", "ⅅ" => "d", "㉐" => "pte", "㋌" => "hg", "㋎" => "ev", "㋏" => "ltd", "㍱" => "hpa", "㍳" => "au", "㍵" => "ov", "㍺" => "iu", "㎀" => "pa", "㎁" => "na", "㎂" => "μa", "㎃" => "ma", "㎄" => "ka", "㎅" => "kb", "㎆" => "mb", "㎇" => "gb", "㎊" => "pf", "㎋" => "nf", "㎌" => "μf", "㎐" => "hz", "㎑" => "khz", "㎒" => "mhz", "㎓" => "ghz", "㎔" => "thz", "㎩" => "pa", "㎪" => "kpa", "㎫" => "mpa", "㎬" => "gpa", "㎴" => "pv", "㎵" => "nv", "㎶" => "μv", "㎷" => "mv", "㎸" => "kv", "㎹" => "mv", "㎺" => "pw", "㎻" => "nw", "㎼" => "μw", "㎽" => "mw", "㎾" => "kw", "㎿" => "mw", "㏀" => "kω", "㏁" => "mω", "㏃" => "bq", "㏆" => "c∕kg", "㏇" => "co.", "㏈" => "db", "㏉" => "gy", "㏋" => "hp", "㏍" => "kk", "㏎" => "km", "㏗" => "ph", "㏙" => "ppm", "㏚" => "pr", "㏜" => "sv", "㏝" => "wb", "㏞" => "v∕m", "㏟" => "a∕m", "𝐀" => "a", "𝐁" => "b", "𝐂" => "c", "𝐃" => "d", "𝐄" => "e", "𝐅" => "f", "𝐆" => "g", "𝐇" => "h", "𝐈" => "i", "𝐉" => "j", "𝐊" => "k", "𝐋" => "l", "𝐌" => "m", "𝐍" => "n", "𝐎" => "o", "𝐏" => "p", "𝐐" => "q", "𝐑" => "r", "𝐒" => "s", "𝐓" => "t", "𝐔" => "u", "𝐕" => "v", "𝐖" => "w", "𝐗" => "x", "𝐘" => "y", "𝐙" => "z", "𝐴" => "a", "𝐵" => "b", "𝐶" => "c", "𝐷" => "d", "𝐸" => "e", "𝐹" => "f", "𝐺" => "g", "𝐻" => "h", "𝐼" => "i", "𝐽" => "j", "𝐾" => "k", "𝐿" => "l", "𝑀" => "m", "𝑁" => "n", "𝑂" => "o", "𝑃" => "p", "𝑄" => "q", "𝑅" => "r", "𝑆" => "s", "𝑇" => "t", "𝑈" => "u", "𝑉" => "v", "𝑊" => "w", "𝑋" => "x", "𝑌" => "y", "𝑍" => "z", "𝑨" => "a", "𝑩" => "b", "𝑪" => "c", "𝑫" => "d", "𝑬" => "e", "𝑭" => "f", "𝑮" => "g", "𝑯" => "h", "𝑰" => "i", "𝑱" => "j", "𝑲" => "k", "𝑳" => "l", "𝑴" => "m", "𝑵" => "n", "𝑶" => "o", "𝑷" => "p", "𝑸" => "q", "𝑹" => "r", "𝑺" => "s", "𝑻" => "t", "𝑼" => "u", "𝑽" => "v", "𝑾" => "w", "𝑿" => "x", "𝒀" => "y", "𝒁" => "z", "𝒜" => "a", "𝒞" => "c", "𝒟" => "d", "𝒢" => "g", "𝒥" => "j", "𝒦" => "k", "𝒩" => "n", "𝒪" => "o", "𝒫" => "p", "𝒬" => "q", "𝒮" => "s", "𝒯" => "t", "𝒰" => "u", "𝒱" => "v", "𝒲" => "w", "𝒳" => "x", "𝒴" => "y", "𝒵" => "z", "𝓐" => "a", "𝓑" => "b", "𝓒" => "c", "𝓓" => "d", "𝓔" => "e", "𝓕" => "f", "𝓖" => "g", "𝓗" => "h", "𝓘" => "i", "𝓙" => "j", "𝓚" => "k", "𝓛" => "l", "𝓜" => "m", "𝓝" => "n", "𝓞" => "o", "𝓟" => "p", "𝓠" => "q", "𝓡" => "r", "𝓢" => "s", "𝓣" => "t", "𝓤" => "u", "𝓥" => "v", "𝓦" => "w", "𝓧" => "x", "𝓨" => "y", "𝓩" => "z", "𝔄" => "a", "𝔅" => "b", "𝔇" => "d", "𝔈" => "e", "𝔉" => "f", "𝔊" => "g", "𝔍" => "j", "𝔎" => "k", "𝔏" => "l", "𝔐" => "m", "𝔑" => "n", "𝔒" => "o", "𝔓" => "p", "𝔔" => "q", "𝔖" => "s", "𝔗" => "t", "𝔘" => "u", "𝔙" => "v", "𝔚" => "w", "𝔛" => "x", "𝔜" => "y", "𝔸" => "a", "𝔹" => "b", "𝔻" => "d", "𝔼" => "e", "𝔽" => "f", "𝔾" => "g", "𝕀" => "i", "𝕁" => "j", "𝕂" => "k", "𝕃" => "l", "𝕄" => "m", "𝕆" => "o", "𝕊" => "s", "𝕋" => "t", "𝕌" => "u", "𝕍" => "v", "𝕎" => "w", "𝕏" => "x", "𝕐" => "y", "𝕬" => "a", "𝕭" => "b", "𝕮" => "c", "𝕯" => "d", "𝕰" => "e", "𝕱" => "f", "𝕲" => "g", "𝕳" => "h", "𝕴" => "i", "𝕵" => "j", "𝕶" => "k", "𝕷" => "l", "𝕸" => "m", "𝕹" => "n", "𝕺" => "o", "𝕻" => "p", "𝕼" => "q", "𝕽" => "r", "𝕾" => "s", "𝕿" => "t", "𝖀" => "u", "𝖁" => "v", "𝖂" => "w", "𝖃" => "x", "𝖄" => "y", "𝖅" => "z", "𝖠" => "a", "𝖡" => "b", "𝖢" => "c", "𝖣" => "d", "𝖤" => "e", "𝖥" => "f", "𝖦" => "g", "𝖧" => "h", "𝖨" => "i", "𝖩" => "j", "𝖪" => "k", "𝖫" => "l", "𝖬" => "m", "𝖭" => "n", "𝖮" => "o", "𝖯" => "p", "𝖰" => "q", "𝖱" => "r", "𝖲" => "s", "𝖳" => "t", "𝖴" => "u", "𝖵" => "v", "𝖶" => "w", "𝖷" => "x", "𝖸" => "y", "𝖹" => "z", "𝗔" => "a", "𝗕" => "b", "𝗖" => "c", "𝗗" => "d", "𝗘" => "e", "𝗙" => "f", "𝗚" => "g", "𝗛" => "h", "𝗜" => "i", "𝗝" => "j", "𝗞" => "k", "𝗟" => "l", "𝗠" => "m", "𝗡" => "n", "𝗢" => "o", "𝗣" => "p", "𝗤" => "q", "𝗥" => "r", "𝗦" => "s", "𝗧" => "t", "𝗨" => "u", "𝗩" => "v", "𝗪" => "w", "𝗫" => "x", "𝗬" => "y", "𝗭" => "z", "𝘈" => "a", "𝘉" => "b", "𝘊" => "c", "𝘋" => "d", "𝘌" => "e", "𝘍" => "f", "𝘎" => "g", "𝘏" => "h", "𝘐" => "i", "𝘑" => "j", "𝘒" => "k", "𝘓" => "l", "𝘔" => "m", "𝘕" => "n", "𝘖" => "o", "𝘗" => "p", "𝘘" => "q", "𝘙" => "r", "𝘚" => "s", "𝘛" => "t", "𝘜" => "u", "𝘝" => "v", "𝘞" => "w", "𝘟" => "x", "𝘠" => "y", "𝘡" => "z", "𝘼" => "a", "𝘽" => "b", "𝘾" => "c", "𝘿" => "d", "𝙀" => "e", "𝙁" => "f", "𝙂" => "g", "𝙃" => "h", "𝙄" => "i", "𝙅" => "j", "𝙆" => "k", "𝙇" => "l", "𝙈" => "m", "𝙉" => "n", "𝙊" => "o", "𝙋" => "p", "𝙌" => "q", "𝙍" => "r", "𝙎" => "s", "𝙏" => "t", "𝙐" => "u", "𝙑" => "v", "𝙒" => "w", "𝙓" => "x", "𝙔" => "y", "𝙕" => "z", "𝙰" => "a", "𝙱" => "b", "𝙲" => "c", "𝙳" => "d", "𝙴" => "e", "𝙵" => "f", "𝙶" => "g", "𝙷" => "h", "𝙸" => "i", "𝙹" => "j", "𝙺" => "k", "𝙻" => "l", "𝙼" => "m", "𝙽" => "n", "𝙾" => "o", "𝙿" => "p", "𝚀" => "q", "𝚁" => "r", "𝚂" => "s", "𝚃" => "t", "𝚄" => "u", "𝚅" => "v", "𝚆" => "w", "𝚇" => "x", "𝚈" => "y", "𝚉" => "z", "𝚨" => "α", "𝚩" => "β", "𝚪" => "γ", "𝚫" => "δ", "𝚬" => "ε", "𝚭" => "ζ", "𝚮" => "η", "𝚯" => "θ", "𝚰" => "ι", "𝚱" => "κ", "𝚲" => "λ", "𝚳" => "μ", "𝚴" => "ν", "𝚵" => "ξ", "𝚶" => "ο", "𝚷" => "π", "𝚸" => "ρ", "𝚹" => "θ", "𝚺" => "σ", "𝚻" => "τ", "𝚼" => "υ", "𝚽" => "φ", "𝚾" => "χ", "𝚿" => "ψ", "𝛀" => "ω", "𝛓" => "σ", "𝛢" => "α", "𝛣" => "β", "𝛤" => "γ", "𝛥" => "δ", "𝛦" => "ε", "𝛧" => "ζ", "𝛨" => "η", "𝛩" => "θ", "𝛪" => "ι", "𝛫" => "κ", "𝛬" => "λ", "𝛭" => "μ", "𝛮" => "ν", "𝛯" => "ξ", "𝛰" => "ο", "𝛱" => "π", "𝛲" => "ρ", "𝛳" => "θ", "𝛴" => "σ", "𝛵" => "τ", "𝛶" => "υ", "𝛷" => "φ", "𝛸" => "χ", "𝛹" => "ψ", "𝛺" => "ω", "𝜍" => "σ", "𝜜" => "α", "𝜝" => "β", "𝜞" => "γ", "𝜟" => "δ", "𝜠" => "ε", "𝜡" => "ζ", "𝜢" => "η", "𝜣" => "θ", "𝜤" => "ι", "𝜥" => "κ", "𝜦" => "λ", "𝜧" => "μ", "𝜨" => "ν", "𝜩" => "ξ", "𝜪" => "ο", "𝜫" => "π", "𝜬" => "ρ", "𝜭" => "θ", "𝜮" => "σ", "𝜯" => "τ", "𝜰" => "υ", "𝜱" => "φ", "𝜲" => "χ", "𝜳" => "ψ", "𝜴" => "ω", "𝝇" => "σ", "𝝖" => "α", "𝝗" => "β", "𝝘" => "γ", "𝝙" => "δ", "𝝚" => "ε", "𝝛" => "ζ", "𝝜" => "η", "𝝝" => "θ", "𝝞" => "ι", "𝝟" => "κ", "𝝠" => "λ", "𝝡" => "μ", "𝝢" => "ν", "𝝣" => "ξ", "𝝤" => "ο", "𝝥" => "π", "𝝦" => "ρ", "𝝧" => "θ", "𝝨" => "σ", "𝝩" => "τ", "𝝪" => "υ", "𝝫" => "φ", "𝝬" => "χ", "𝝭" => "ψ", "𝝮" => "ω", "𝞁" => "σ", "𝞐" => "α", "𝞑" => "β", "𝞒" => "γ", "𝞓" => "δ", "𝞔" => "ε", "𝞕" => "ζ", "𝞖" => "η", "𝞗" => "θ", "𝞘" => "ι", "𝞙" => "κ", "𝞚" => "λ", "𝞛" => "μ", "𝞜" => "ν", "𝞝" => "ξ", "𝞞" => "ο", "𝞟" => "π", "𝞠" => "ρ", "𝞡" => "θ", "𝞢" => "σ", "𝞣" => "τ", "𝞤" => "υ", "𝞥" => "φ", "𝞦" => "χ", "𝞧" => "ψ", "𝞨" => "ω", "𝞻" => "σ", "𝟊" => "ϝ"); // do the case fold $text = utf8_new_case_fold($text, $option); // convert to NFKC $text = Normalizer::normalize($text, Normalizer::NFKC); // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt $text = strtr($text, $fc_nfkc_closure); return $text; }
/** * @param $string * @return BufferInterface * @throws \Exception */ private function normalize($string) { if (!class_exists('Normalizer')) { if (mb_detect_encoding($string) === 'UTF-8') { throw new \Exception('UTF-8 passphrase is not supported without the PECL intl extension installed.'); } else { return new Buffer($string); } } return new Buffer(\Normalizer::normalize($string, \Normalizer::FORM_KD)); }
function normalize_string($str) { // 半角カタカナを全角カタカナにする。半角濁点の場合は濁点付き全角1文字にする。全角英数字を半角英数字にする。 $str = mb_convert_kana($str, "KVas", "UTF-8"); // 改行コードをPHP_EOLに統一する(セル内改行もPHP_EOLになる)。 $str = ereg_replace("\r\n|\r|\n", PHP_EOL, $str); // NFDがあればNFCに正規化する。 if (Normalizer::isNormalized($str, Normalizer::FORM_D)) { $str = Normalizer::normalize($str, Normalizer::FORM_C); } return $str; }
public function filter($value) { $value = mb_ereg_replace(' +', ' ', trim($value)); $value = mb_ereg_replace("[\r\t\n]", "", $value); // http://www.asciitable.com/ $value = trim($value, "\"'&,"); $value = preg_replace('/\\p{M}/u', '', Normalizer::normalize($value, Normalizer::FORM_D)); $value = mb_strtolower($value, "UTF-8"); $bye = array(' ', '\\"', '\'', '!', '@', '$', '%', '&', '*', '(', ')', ':', '=', '\'', '/', ';', '`', '<', '>', '[', ']', '?', '\\', ',', '#'); $value = str_replace($bye, '', $value); $value = trim($value, "."); return $value; }
public function filter(array $terms) { $has_normalizer = class_exists("Normalizer", false); for ($i = 0, $max = sizeof($terms); $i < $max; $i++) { // убрать HTML Entities $terms[$i] = html_entity_decode($terms[$i], ENT_QUOTES, 'UTF-8'); // Приведение к нормальной C-форме UTF if ($has_normalizer) { // расширение intl (PHP 5.2+) $terms[$i] = Normalizer::normalize($terms[$i], Normalizer::FORM_C); } } return $terms; }
public function login(Request $request, Auth $authModel) { $response = $authModel->loginWithId($request->input()); // Handle error if (array_key_exists('error', $response)) { var_dump($response['message']); return; } $user = \Normalizer::user($response['data']); $user['token'] = $response['data']['token']; // Update session \Utils::setLoginSession($user); return redirect()->route('home')->withCookie('token', $user['token']); }
public function normalize($accent) { // https://github.com/jbroadway/urlify // return URLify::filter( $accent, 255, 'fa' ); // return URLify::downcode( $accent ); // http://stackoverflow.com/a/3542752 // return iconv( 'UTF-8', 'UTF-8//TRANSLIT', $accent ); // http://stackoverflow.com/questions/3371697/replacing-accented-characters-php // SEE: pure php normalizer : https://github.com/tchwork/utf8 if (class_exists('Normalizer')) { $accent = preg_replace('/\\p{Mn}/u', '', Normalizer::normalize($accent, Normalizer::FORM_KD)); } // http://stackoverflow.com/a/3542752 return $accent; }
/** * Process a string to contain purely basic latin characters * * This method processes text to remove diacritic marks and translate * ligatures into individual characters. * * Requires the PHP intl extension and ICU. * * @param string $original The string to process * @return string * * @link http://ahinea.com/en/tech/accented-translate.html */ public function normalise($original) { // Check to make sure the extension is available if (false === class_exists('Normalizer', false)) { return $original; } // map European characters onto two characters before removing diacritics $doubles = array('@\\x{00c4}@u' => 'AE', '@\\x{00d6}@u' => 'OE', '@\\x{00dc}@u' => 'UE', '@\\x{00e4}@u' => 'ae', '@\\x{00f6}@u' => 'oe', '@\\x{00fc}@u' => 'ue', '@\\x{00f1}@u' => 'ny', '@\\x{00ff}@u' => 'yu'); $string = preg_replace(array_keys($doubles), array_values($doubles), $original); // map characters with diacritics on their base-character followed by the diacritical mark $string = \Normalizer::normalize($string, \Normalizer::FORM_D); $pairs = array('@\\pM@u' => '', '@\\x{00c6}@u' => 'AE', '@\\x{00e6}@u' => 'ae', '@\\x{00df}@u' => 'ss', '@\\x{0132}@u' => 'IJ', '@\\x{0133}@u' => 'ij', '@\\x{0152}@u' => 'OE', '@\\x{0153}@u' => 'oe', '@\\x{00d0}@u' => 'D', '@\\x{0110}@u' => 'D', '@\\x{0111}@u' => 'd', '@\\x{00f0}@u' => 'd', '@\\x{0126}@u' => 'H', '@\\x{0127}@u' => 'h', '@\\x{0131}@u' => 'i', '@\\x{0138}@u' => 'k', '@\\x{013f}@u' => 'L', '@\\x{0140}@u' => 'l', '@\\x{0141}@u' => 'L', '@\\x{0142}@u' => 'l', '@\\x{0149}@u' => 'n', '@\\x{014a}@u' => 'N', '@\\x{014b}@u' => 'n', '@\\x{00d8}@u' => 'O', '@\\x{00f8}@u' => 'o', '@\\x{017f}@u' => 's', '@\\x{00de}@u' => 'T', '@\\x{0166}@u' => 'T', '@\\x{00fe}@u' => 't', '@\\x{0167}@u' => 't', '@[^\\0-\\x80]@u' => ''); $string = preg_replace(array_keys($pairs), array_values($pairs), $string); // Allow for possible errors in UTF8-regular-expressions return empty($string) ? $original : $string; }
public function __construct($string = '', string $inputEncoding = 'ISO-8859-1') { if ($string instanceof self) { $this->string = (string) $string->string; } else { $str = (string) $string; if ($str != '') { // if(!preg_match(self::PATTERN_UTF8, $str)) if (!preg_match('//u', $str)) { $str = mb_convert_encoding($str, 'UTF-8', $inputEncoding); } if (class_exists('Normalizer', false) && !\Normalizer::isNormalized($str)) { $str = \Normalizer::normalize($str); } $this->string = (string) $str; } } }
/** * @covers Patchwork\PHP\Override\Normalizer::normalize */ function testNormalize() { $c = in::normalize("déjà ", pn::NFC) . in::normalize("훈쇼™", pn::NFD); $this->assertSame($c, pn::normalize($c, pn::NONE)); $this->assertSame($c, in::normalize($c, pn::NONE)); $c = "déjà 훈쇼™"; $d = in::normalize($c, pn::NFD); $kc = in::normalize($c, pn::NFKC); $kd = in::normalize($c, pn::NFKD); $this->assertSame('', pn::normalize('')); $this->assertSame($c, pn::normalize($d)); $this->assertSame($c, pn::normalize($d, pn::NFC)); $this->assertSame($d, pn::normalize($c, pn::NFD)); $this->assertSame($kc, pn::normalize($d, pn::NFKC)); $this->assertSame($kd, pn::normalize($c, pn::NFKD)); $this->assertFalse(pn::normalize($c, -1)); $this->assertFalse(pn::normalize("ÿ")); }
function search_index__str_normalize(&$o, $s) { $jo_db =& JFactory::getDBO(); /* Do lowercase */ $s = $o->oCase->lc($s); /* Use PECL extension */ if (class_exists('Normalizer')) { return Normalizer::normalize($s, Normalizer::FORM_C); } /* */ preg_match_all("/./u", $s, $ar); $ar = $ar[0]; $ar_c_crc = array(); /* For each character */ foreach ($ar as $k => &$v) { /* Use values as key */ /* PHP-bug: sometimes a string keys becomes interger */ $ar_c_crc[$v] = sprintf("%u", crc32($v)); } unset($v); if (empty($ar_c_crc)) { return $s; } /* */ $query = 'SELECT `str_from`, `str_to`' . ' FROM ' . $o->V->db_name . '.' . $o->V->table_prefix . 'unicode_normalization ' . ' WHERE `crc32u` IN (' . implode(',', array_values($ar_c_crc)) . ')'; $jo_db->setQuery($query); $ar_sql = $jo_db->loadAssocList(); if (is_null($ar_sql)) { $ar_sql = array(); } /* Normalize text */ foreach ($ar_sql as $k => &$v) { $s = str_replace(urldecode($v['str_from']), urldecode($v['str_to']), $s); unset($ar_sql[$k]); } unset($v); return $s; }
/** $input = Normalizer::normalize($input,Normalizer::FORM_C); echo "$input|\n"; echo "strlen:".strlen($input)."|\n"; echo "strlen_dec:".strlen(utf8_decode($input))."|\n"; echo "count:".count($input)."|\n"; echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n"; var_dump ( $input); $input = Normalizer::normalize($input, Normalizer::FORM_D); echo "$input|\n"; echo "strlen:".strlen($input)."|\n"; echo "strlen_dec:".strlen(utf8_decode($input))."|\n"; echo "count:".count($input)."|\n"; echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n"; var_dump ( $input); */ function printme($input, $nf) { $input = Normalizer::normalize($input, $nf); switch ($nf) { case Normalizer::FORM_C: $nf = "NFC"; break; case Normalizer::FORM_D: $nf = "NFD"; break; case Normalizer::FORM_KC: $nf = "NFKC"; break; case Normalizer::FORM_KD: $nf = "NFKD"; break; } echo "/***" . $nf . "***" . $input . "***\n"; var_dump($input); echo "strlen_dec:" . strlen(utf8_decode($input)) . "\n"; echo "count_chars:" . count_chars_unicode($input) . "\n"; echo "\n"; }
public function str_normalize($s) { $s = $this->oCase->lc($s); /* Use PECL extension */ if (class_exists('Normalizer')) { return Normalizer::normalize($s, Normalizer::FORM_C); } /* */ preg_match_all("/./u", $s, $ar); $ar = $ar[0]; $ar_c_crc = array(); /* For each character */ foreach ($ar as $k => &$v) { /* Use values as key */ /* PHP-bug: sometimes a string keys becomes interger */ $ar_c_crc[$v] = sprintf("%u", crc32($v)); } unset($v); if (empty($ar_c_crc)) { return $s; } /* */ $is_debug_q = $this->oDb->is_debug_q; $this->oDb->is_debug_q = false; $this->oDb->select('str_from, str_to'); $this->oDb->from('unicode_normalization'); $this->oDb->where_in('crc32u', array_values($ar_c_crc)); $ar_sql = $this->oDb->get()->result_array(); $this->oDb->is_debug_q = $is_debug_q; /* Normalize text */ foreach ($ar_sql as $k => &$v) { $s = str_replace(urldecode($v['str_from']), urldecode($v['str_to']), $s); unset($ar_sql[$k]); } unset($v); return $s; }
/** * Sanitizes a string, replacing whitespace and a few other characters with dashes. * * Limits the output to alphanumeric characters, underscore (_) and dash (-). * Whitespace becomes a dash. * * @param string $string The string to be sanitized. * @return string The sanitized string. */ public static function string($string = null) { if (empty($string)) { throw new \InvalidArgumentException('No input string is given'); } $string = strip_tags($string); // Preserve escaped octets. $string = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $string); // Remove percent signs that are not part of an octet. $string = str_replace('%', '', $string); // Restore octets. $string = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $string); if (function_exists('mb_strtolower')) { $string = mb_strtolower($string, 'UTF-8'); } else { $string = strtolower($string); } $string = preg_replace('/\\p{Mn}/u', '', \Normalizer::normalize($string, \Normalizer::FORM_KD)); $string = preg_replace('/[^%a-z0-9 _-]/', '', $string); $string = preg_replace('/\\s+/', '-', $string); $string = preg_replace('|-+|', '-', $string); $string = trim($string, '-'); return $string; }
private function normalize($str, $opts) { if ($opts['nfc'] || $opts['nfkc']) { if (class_exists('Normalizer', false)) { if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) { $str = Normalizer::normalize($str, Normalizer::FORM_C); } if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) { $str = Normalizer::normalize($str, Normalizer::FORM_KC); } } else { if (!class_exists('I18N_UnicodeNormalizer', false)) { @(include_once 'I18N/UnicodeNormalizer.php'); } if (class_exists('I18N_UnicodeNormalizer', false)) { $normalizer = new I18N_UnicodeNormalizer(); if ($opts['nfc']) { $str = $normalizer->normalize($str, 'NFC'); } if ($opts['nfkc']) { $str = $normalizer->normalize($str, 'NFKC'); } } } } if ($opts['lowercase']) { $str = strtolower($str); } if ($opts['convmap'] && is_array($opts['convmap'])) { $str = strtr($str, $opts['convmap']); } return $str; }