/**
  * @covers Patchwork\PHP\Override\Intl::grapheme_strlen
  */
 function testGrapheme_strlen()
 {
     $this->assertSame(3, grapheme_strlen('한국어'));
     $this->assertSame(3, grapheme_strlen(n::normalize('한국어', n::NFD)));
     $this->assertSame(3, p::grapheme_strlen('한국어'));
     $this->assertSame(3, p::grapheme_strlen(n::normalize('한국어', n::NFD)));
 }
Esempio n. 2
0
 /**
  * Class constructor.
  *
  * @param mixed $string
  * @param string $encoding
  * @throws \InvalidArgumentException
  */
 public function __construct($string, $encoding = self::ENCODING)
 {
     if (is_null($string)) {
         $string = 'null';
     } elseif (is_bool($string)) {
         $string = $string ? 'true' : 'false';
     } elseif (is_int($string) || is_float($string)) {
         $string = (string) $string;
     } elseif (is_object($string)) {
         if (!method_exists($string, '__toString')) {
             throw new \InvalidArgumentException(sprintf('Object of class %s cannot be converted to String', get_class($string)));
         }
         $string = (string) $string;
     } elseif (!is_string($string)) {
         throw new \InvalidArgumentException('Cannot convert a variable of type ' . gettype($string) . ' to String');
     }
     if (!self::checkEncoding($encoding)) {
         throw new \InvalidArgumentException('Unsupported encoding: ' . $encoding);
     }
     if (!mb_check_encoding($string, $encoding)) {
         throw new \InvalidArgumentException('String is not encoded in ' . $encoding);
     }
     if ($encoding != self::ENCODING) {
         $string = mb_convert_encoding($string, self::ENCODING, $encoding);
     }
     $string = \Normalizer::normalize($string);
     $this->string = $string;
     $this->length = mb_strlen($string, self::ENCODING);
 }
 /**
  * @covers Patchwork\PHP\Override\Mbstring::mb_strlen
  */
 function testmb_strlen()
 {
     $this->assertSame(3, mb_strlen('한국어'));
     $this->assertSame(8, mb_strlen(n::normalize('한국어', n::NFD)));
     $this->assertSame(3, p::mb_strlen('한국어'));
     $this->assertSame(8, p::mb_strlen(n::normalize('한국어', n::NFD)));
 }
 public static function message($publishHandler, eZContentObject $object, $message, $messageLength = null, $options)
 {
     $url = false;
     if (isset($options['include_url']) && (bool) $options['include_url'] === true) {
         $url = $object->attribute('main_node')->attribute('url_alias');
         eZURI::transformURI($url, true, 'full');
         if (isset($options['shorten_url']) && (bool) $options['shorten_url'] === true) {
             $urlReturned = $publishHandler->shorten($url, $options['shorten_handler']);
             if (is_string($urlReturned)) {
                 $url = $urlReturned;
             }
         }
         if ($messageLength != null) {
             $messageLength = $messageLength - strlen($url) - 1;
         }
     }
     if (class_exists('Normalizer')) {
         $message = Normalizer::normalize($message, Normalizer::FORM_C);
     }
     if ($messageLength != null) {
         $message = mb_substr($message, 0, $messageLength);
     }
     if ($url) {
         $message .= ' ' . $url;
     }
     return $message;
 }
Esempio n. 5
0
 protected function applyValue($input, Context $ctx)
 {
     $output = $input;
     if ($input === null || $input === true || $input === false) {
         goto done;
     }
     if ($this->allowLoose) {
         $checkValue = $input;
         if (is_string($checkValue)) {
             $checkValue = mb_strtolower(\Normalizer::normalize($checkValue));
         }
         if (in_array($checkValue, $this->trueValues ?: self::$defaultLooseTrue, true)) {
             $output = true;
         } elseif (in_array($checkValue, $this->falseValues ?: self::$defaultLooseFalse, true)) {
             $output = false;
         }
     }
     if ($output !== false && $output !== true) {
         $ctx->addReason($this, ['id' => 'bool.invalid']);
     }
     done:
     if ($output !== $input) {
         $ctx->setChange(Change::Internal);
     }
     return $output;
 }
 public function testUnknown()
 {
     $data = fopen('php://memory', 'rb');
     fclose($data);
     $normalized = $this->normalizer->normalize($data);
     $this->assertEquals('[unknown(' . gettype($data) . ')]', $normalized);
 }
Esempio n. 7
0
function simplify_strings($string)
{
    //Normalisation de la chaine utf8 en mode caractère + accents
    $string = Normalizer::normalize($string, Normalizer::FORM_D);
    //Suppression des accents et minuscules
    return strtolower(preg_replace('~\\p{Mn}~u', '', $string));
}
 /**
  * 入力を妥当な値に変換します。
  * @param string $input
  * @return string 変換できなかった場合は空文字列を返します。
  */
 protected function convertToValidCharacters(string $input) : string
 {
     $converted = preg_replace('/[\\p{C}\\p{Z}\\p{M}]+/u', '', \Normalizer::normalize(str_replace('~', '〜', $input), \Normalizer::FORM_KC));
     if ($this->isRegExp($converted)) {
         $converted = trim($converted, '/');
     }
     return $converted;
 }
Esempio n. 9
0
function asciify($text)
{
    global $special_cases, $special_cases_keys;
    $text = Normalizer::normalize($text, Normalizer::FORM_C);
    $text = str_replace($special_cases_keys, $special_cases, $text);
    $text = Normalizer::normalize($text, Normalizer::FORM_D);
    return preg_replace('/[^\\x20-\\x7E]/', '', $text);
}
Esempio n. 10
0
 public function testNormalizer()
 {
     $english = Normalizer::factory();
     $greek = Normalizer::factory("Greek");
     $this->assertEquals(explode(" ", "ο μορφωμενοσ διαφερει απο τον αμορφωτο οσο ο ζωντανοσ απο τον νεκρο"), $greek->normalizeAll(explode(" ", "Ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό")));
     $this->assertEquals(explode(" ", "ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό"), $english->normalizeAll(explode(" ", "Ο μορφωμένος διαφέρει από τον αμόρφωτο όσο ο ζωντανός από τον νεκρό")));
     $this->assertEquals(explode(" ", "when a father gives to his son both laugh when a son gives to his father both cry"), $english->normalizeAll(explode(" ", "When a father gives to his son both laugh when a son gives to his father both cry")));
 }
Esempio n. 11
0
 function testStringUnicodeDifferentNormalisationEqual()
 {
     $a = \Normalizer::normalize("ő", \Normalizer::FORM_KC);
     $b = \Normalizer::normalize("ő", \Normalizer::FORM_D);
     $this->assertNotEquals($a, $b);
     $flat = $this->compare($a, $b);
     $this->assertTrue($flat->valid);
 }
Esempio n. 12
0
 /**
  * Class constructor.
  *
  * @param mixed $string
  *
  * @throws \InvalidArgumentException
  */
 public function __construct(string $string)
 {
     if (!mb_check_encoding($string, 'UTF-8')) {
         throw new \InvalidArgumentException('String is not valid UTF-8');
     }
     $string = \Normalizer::normalize($string);
     $this->string = $string;
     $this->length = mb_strlen($string, 'UTF-8');
 }
Esempio n. 13
0
 /**
  * Public factory method.
  *
  * @param string $string
  *
  * @return UnicodeString
  *
  * @throws \InvalidArgumentException
  */
 public static function of(string $string)
 {
     if (!mb_check_encoding($string, 'UTF-8')) {
         throw new \InvalidArgumentException('String is not valid UTF-8');
     }
     $string = \Normalizer::normalize($string);
     $length = mb_strlen($string, 'UTF-8');
     return new UnicodeString($string, $length);
 }
Esempio n. 14
0
 /**
  * @see https://github.com/alixaxel/phunction/blob/master/phunction/Text.php#L297
  */
 public static function unaccent($string)
 {
     if (extension_loaded('intl') === true) {
         $string = \Normalizer::normalize($string, \Normalizer::FORM_KD);
     }
     if (strpos($string = htmlentities($string, ENT_QUOTES, 'UTF-8'), '&') !== false) {
         $string = html_entity_decode(preg_replace('~&([a-z]{1,2})(?:acute|caron|cedil|circ|grave|lig|orn|ring|slash|tilde|uml);~i', '$1', $string), ENT_QUOTES, 'UTF-8');
     }
     return $string;
 }
Esempio n. 15
0
 private function checkAndNormaliseEncoding(string $text) : string
 {
     if (!mb_check_encoding($text, self::ENCODING)) {
         throw new MessagePostFailureException('Message text encoding invalid');
     }
     $text = \Normalizer::normalize(rtrim($text), \Normalizer::FORM_C);
     if ($text === false) {
         throw new MessagePostFailureException('Failed to normalize message text');
     }
     return $text;
 }
Esempio n. 16
0
function utf8_new_case_fold_nfkc($text, $option = 'full')
{
    static $fc_nfkc_closure = array("ͺ" => " ι", "ϒ" => "υ", "ϓ" => "ύ", "ϔ" => "ϋ", "ϲ" => "σ", "Ϲ" => "σ", "ᴬ" => "a", "ᴭ" => "æ", "ᴮ" => "b", "ᴰ" => "d", "ᴱ" => "e", "ᴲ" => "ǝ", "ᴳ" => "g", "ᴴ" => "h", "ᴵ" => "i", "ᴶ" => "j", "ᴷ" => "k", "ᴸ" => "l", "ᴹ" => "m", "ᴺ" => "n", "ᴼ" => "o", "ᴽ" => "ȣ", "ᴾ" => "p", "ᴿ" => "r", "ᵀ" => "t", "ᵁ" => "u", "ᵂ" => "w", "₨" => "rs", "ℂ" => "c", "℃" => "°c", "ℇ" => "ɛ", "℉" => "°f", "ℋ" => "h", "ℌ" => "h", "ℍ" => "h", "ℐ" => "i", "ℑ" => "i", "ℒ" => "l", "ℕ" => "n", "№" => "no", "ℙ" => "p", "ℚ" => "q", "ℛ" => "r", "ℜ" => "r", "ℝ" => "r", "℠" => "sm", "℡" => "tel", "™" => "tm", "ℤ" => "z", "ℨ" => "z", "ℬ" => "b", "ℭ" => "c", "ℰ" => "e", "ℱ" => "f", "ℳ" => "m", "℻" => "fax", "ℾ" => "γ", "ℿ" => "π", "ⅅ" => "d", "㉐" => "pte", "㋌" => "hg", "㋎" => "ev", "㋏" => "ltd", "㍱" => "hpa", "㍳" => "au", "㍵" => "ov", "㍺" => "iu", "㎀" => "pa", "㎁" => "na", "㎂" => "μa", "㎃" => "ma", "㎄" => "ka", "㎅" => "kb", "㎆" => "mb", "㎇" => "gb", "㎊" => "pf", "㎋" => "nf", "㎌" => "μf", "㎐" => "hz", "㎑" => "khz", "㎒" => "mhz", "㎓" => "ghz", "㎔" => "thz", "㎩" => "pa", "㎪" => "kpa", "㎫" => "mpa", "㎬" => "gpa", "㎴" => "pv", "㎵" => "nv", "㎶" => "μv", "㎷" => "mv", "㎸" => "kv", "㎹" => "mv", "㎺" => "pw", "㎻" => "nw", "㎼" => "μw", "㎽" => "mw", "㎾" => "kw", "㎿" => "mw", "㏀" => "kω", "㏁" => "mω", "㏃" => "bq", "㏆" => "c∕kg", "㏇" => "co.", "㏈" => "db", "㏉" => "gy", "㏋" => "hp", "㏍" => "kk", "㏎" => "km", "㏗" => "ph", "㏙" => "ppm", "㏚" => "pr", "㏜" => "sv", "㏝" => "wb", "㏞" => "v∕m", "㏟" => "a∕m", "𝐀" => "a", "𝐁" => "b", "𝐂" => "c", "𝐃" => "d", "𝐄" => "e", "𝐅" => "f", "𝐆" => "g", "𝐇" => "h", "𝐈" => "i", "𝐉" => "j", "𝐊" => "k", "𝐋" => "l", "𝐌" => "m", "𝐍" => "n", "𝐎" => "o", "𝐏" => "p", "𝐐" => "q", "𝐑" => "r", "𝐒" => "s", "𝐓" => "t", "𝐔" => "u", "𝐕" => "v", "𝐖" => "w", "𝐗" => "x", "𝐘" => "y", "𝐙" => "z", "𝐴" => "a", "𝐵" => "b", "𝐶" => "c", "𝐷" => "d", "𝐸" => "e", "𝐹" => "f", "𝐺" => "g", "𝐻" => "h", "𝐼" => "i", "𝐽" => "j", "𝐾" => "k", "𝐿" => "l", "𝑀" => "m", "𝑁" => "n", "𝑂" => "o", "𝑃" => "p", "𝑄" => "q", "𝑅" => "r", "𝑆" => "s", "𝑇" => "t", "𝑈" => "u", "𝑉" => "v", "𝑊" => "w", "𝑋" => "x", "𝑌" => "y", "𝑍" => "z", "𝑨" => "a", "𝑩" => "b", "𝑪" => "c", "𝑫" => "d", "𝑬" => "e", "𝑭" => "f", "𝑮" => "g", "𝑯" => "h", "𝑰" => "i", "𝑱" => "j", "𝑲" => "k", "𝑳" => "l", "𝑴" => "m", "𝑵" => "n", "𝑶" => "o", "𝑷" => "p", "𝑸" => "q", "𝑹" => "r", "𝑺" => "s", "𝑻" => "t", "𝑼" => "u", "𝑽" => "v", "𝑾" => "w", "𝑿" => "x", "𝒀" => "y", "𝒁" => "z", "𝒜" => "a", "𝒞" => "c", "𝒟" => "d", "𝒢" => "g", "𝒥" => "j", "𝒦" => "k", "𝒩" => "n", "𝒪" => "o", "𝒫" => "p", "𝒬" => "q", "𝒮" => "s", "𝒯" => "t", "𝒰" => "u", "𝒱" => "v", "𝒲" => "w", "𝒳" => "x", "𝒴" => "y", "𝒵" => "z", "𝓐" => "a", "𝓑" => "b", "𝓒" => "c", "𝓓" => "d", "𝓔" => "e", "𝓕" => "f", "𝓖" => "g", "𝓗" => "h", "𝓘" => "i", "𝓙" => "j", "𝓚" => "k", "𝓛" => "l", "𝓜" => "m", "𝓝" => "n", "𝓞" => "o", "𝓟" => "p", "𝓠" => "q", "𝓡" => "r", "𝓢" => "s", "𝓣" => "t", "𝓤" => "u", "𝓥" => "v", "𝓦" => "w", "𝓧" => "x", "𝓨" => "y", "𝓩" => "z", "𝔄" => "a", "𝔅" => "b", "𝔇" => "d", "𝔈" => "e", "𝔉" => "f", "𝔊" => "g", "𝔍" => "j", "𝔎" => "k", "𝔏" => "l", "𝔐" => "m", "𝔑" => "n", "𝔒" => "o", "𝔓" => "p", "𝔔" => "q", "𝔖" => "s", "𝔗" => "t", "𝔘" => "u", "𝔙" => "v", "𝔚" => "w", "𝔛" => "x", "𝔜" => "y", "𝔸" => "a", "𝔹" => "b", "𝔻" => "d", "𝔼" => "e", "𝔽" => "f", "𝔾" => "g", "𝕀" => "i", "𝕁" => "j", "𝕂" => "k", "𝕃" => "l", "𝕄" => "m", "𝕆" => "o", "𝕊" => "s", "𝕋" => "t", "𝕌" => "u", "𝕍" => "v", "𝕎" => "w", "𝕏" => "x", "𝕐" => "y", "𝕬" => "a", "𝕭" => "b", "𝕮" => "c", "𝕯" => "d", "𝕰" => "e", "𝕱" => "f", "𝕲" => "g", "𝕳" => "h", "𝕴" => "i", "𝕵" => "j", "𝕶" => "k", "𝕷" => "l", "𝕸" => "m", "𝕹" => "n", "𝕺" => "o", "𝕻" => "p", "𝕼" => "q", "𝕽" => "r", "𝕾" => "s", "𝕿" => "t", "𝖀" => "u", "𝖁" => "v", "𝖂" => "w", "𝖃" => "x", "𝖄" => "y", "𝖅" => "z", "𝖠" => "a", "𝖡" => "b", "𝖢" => "c", "𝖣" => "d", "𝖤" => "e", "𝖥" => "f", "𝖦" => "g", "𝖧" => "h", "𝖨" => "i", "𝖩" => "j", "𝖪" => "k", "𝖫" => "l", "𝖬" => "m", "𝖭" => "n", "𝖮" => "o", "𝖯" => "p", "𝖰" => "q", "𝖱" => "r", "𝖲" => "s", "𝖳" => "t", "𝖴" => "u", "𝖵" => "v", "𝖶" => "w", "𝖷" => "x", "𝖸" => "y", "𝖹" => "z", "𝗔" => "a", "𝗕" => "b", "𝗖" => "c", "𝗗" => "d", "𝗘" => "e", "𝗙" => "f", "𝗚" => "g", "𝗛" => "h", "𝗜" => "i", "𝗝" => "j", "𝗞" => "k", "𝗟" => "l", "𝗠" => "m", "𝗡" => "n", "𝗢" => "o", "𝗣" => "p", "𝗤" => "q", "𝗥" => "r", "𝗦" => "s", "𝗧" => "t", "𝗨" => "u", "𝗩" => "v", "𝗪" => "w", "𝗫" => "x", "𝗬" => "y", "𝗭" => "z", "𝘈" => "a", "𝘉" => "b", "𝘊" => "c", "𝘋" => "d", "𝘌" => "e", "𝘍" => "f", "𝘎" => "g", "𝘏" => "h", "𝘐" => "i", "𝘑" => "j", "𝘒" => "k", "𝘓" => "l", "𝘔" => "m", "𝘕" => "n", "𝘖" => "o", "𝘗" => "p", "𝘘" => "q", "𝘙" => "r", "𝘚" => "s", "𝘛" => "t", "𝘜" => "u", "𝘝" => "v", "𝘞" => "w", "𝘟" => "x", "𝘠" => "y", "𝘡" => "z", "𝘼" => "a", "𝘽" => "b", "𝘾" => "c", "𝘿" => "d", "𝙀" => "e", "𝙁" => "f", "𝙂" => "g", "𝙃" => "h", "𝙄" => "i", "𝙅" => "j", "𝙆" => "k", "𝙇" => "l", "𝙈" => "m", "𝙉" => "n", "𝙊" => "o", "𝙋" => "p", "𝙌" => "q", "𝙍" => "r", "𝙎" => "s", "𝙏" => "t", "𝙐" => "u", "𝙑" => "v", "𝙒" => "w", "𝙓" => "x", "𝙔" => "y", "𝙕" => "z", "𝙰" => "a", "𝙱" => "b", "𝙲" => "c", "𝙳" => "d", "𝙴" => "e", "𝙵" => "f", "𝙶" => "g", "𝙷" => "h", "𝙸" => "i", "𝙹" => "j", "𝙺" => "k", "𝙻" => "l", "𝙼" => "m", "𝙽" => "n", "𝙾" => "o", "𝙿" => "p", "𝚀" => "q", "𝚁" => "r", "𝚂" => "s", "𝚃" => "t", "𝚄" => "u", "𝚅" => "v", "𝚆" => "w", "𝚇" => "x", "𝚈" => "y", "𝚉" => "z", "𝚨" => "α", "𝚩" => "β", "𝚪" => "γ", "𝚫" => "δ", "𝚬" => "ε", "𝚭" => "ζ", "𝚮" => "η", "𝚯" => "θ", "𝚰" => "ι", "𝚱" => "κ", "𝚲" => "λ", "𝚳" => "μ", "𝚴" => "ν", "𝚵" => "ξ", "𝚶" => "ο", "𝚷" => "π", "𝚸" => "ρ", "𝚹" => "θ", "𝚺" => "σ", "𝚻" => "τ", "𝚼" => "υ", "𝚽" => "φ", "𝚾" => "χ", "𝚿" => "ψ", "𝛀" => "ω", "𝛓" => "σ", "𝛢" => "α", "𝛣" => "β", "𝛤" => "γ", "𝛥" => "δ", "𝛦" => "ε", "𝛧" => "ζ", "𝛨" => "η", "𝛩" => "θ", "𝛪" => "ι", "𝛫" => "κ", "𝛬" => "λ", "𝛭" => "μ", "𝛮" => "ν", "𝛯" => "ξ", "𝛰" => "ο", "𝛱" => "π", "𝛲" => "ρ", "𝛳" => "θ", "𝛴" => "σ", "𝛵" => "τ", "𝛶" => "υ", "𝛷" => "φ", "𝛸" => "χ", "𝛹" => "ψ", "𝛺" => "ω", "𝜍" => "σ", "𝜜" => "α", "𝜝" => "β", "𝜞" => "γ", "𝜟" => "δ", "𝜠" => "ε", "𝜡" => "ζ", "𝜢" => "η", "𝜣" => "θ", "𝜤" => "ι", "𝜥" => "κ", "𝜦" => "λ", "𝜧" => "μ", "𝜨" => "ν", "𝜩" => "ξ", "𝜪" => "ο", "𝜫" => "π", "𝜬" => "ρ", "𝜭" => "θ", "𝜮" => "σ", "𝜯" => "τ", "𝜰" => "υ", "𝜱" => "φ", "𝜲" => "χ", "𝜳" => "ψ", "𝜴" => "ω", "𝝇" => "σ", "𝝖" => "α", "𝝗" => "β", "𝝘" => "γ", "𝝙" => "δ", "𝝚" => "ε", "𝝛" => "ζ", "𝝜" => "η", "𝝝" => "θ", "𝝞" => "ι", "𝝟" => "κ", "𝝠" => "λ", "𝝡" => "μ", "𝝢" => "ν", "𝝣" => "ξ", "𝝤" => "ο", "𝝥" => "π", "𝝦" => "ρ", "𝝧" => "θ", "𝝨" => "σ", "𝝩" => "τ", "𝝪" => "υ", "𝝫" => "φ", "𝝬" => "χ", "𝝭" => "ψ", "𝝮" => "ω", "𝞁" => "σ", "𝞐" => "α", "𝞑" => "β", "𝞒" => "γ", "𝞓" => "δ", "𝞔" => "ε", "𝞕" => "ζ", "𝞖" => "η", "𝞗" => "θ", "𝞘" => "ι", "𝞙" => "κ", "𝞚" => "λ", "𝞛" => "μ", "𝞜" => "ν", "𝞝" => "ξ", "𝞞" => "ο", "𝞟" => "π", "𝞠" => "ρ", "𝞡" => "θ", "𝞢" => "σ", "𝞣" => "τ", "𝞤" => "υ", "𝞥" => "φ", "𝞦" => "χ", "𝞧" => "ψ", "𝞨" => "ω", "𝞻" => "σ", "𝟊" => "ϝ");
    // do the case fold
    $text = utf8_new_case_fold($text, $option);
    // convert to NFKC
    $text = Normalizer::normalize($text, Normalizer::NFKC);
    // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
    $text = strtr($text, $fc_nfkc_closure);
    return $text;
}
Esempio n. 17
0
 /**
  * @param $string
  * @return BufferInterface
  * @throws \Exception
  */
 private function normalize($string)
 {
     if (!class_exists('Normalizer')) {
         if (mb_detect_encoding($string) === 'UTF-8') {
             throw new \Exception('UTF-8 passphrase is not supported without the PECL intl extension installed.');
         } else {
             return new Buffer($string);
         }
     }
     return new Buffer(\Normalizer::normalize($string, \Normalizer::FORM_KD));
 }
Esempio n. 18
0
function normalize_string($str)
{
    // 半角カタカナを全角カタカナにする。半角濁点の場合は濁点付き全角1文字にする。全角英数字を半角英数字にする。
    $str = mb_convert_kana($str, "KVas", "UTF-8");
    // 改行コードをPHP_EOLに統一する(セル内改行もPHP_EOLになる)。
    $str = ereg_replace("\r\n|\r|\n", PHP_EOL, $str);
    // NFDがあればNFCに正規化する。
    if (Normalizer::isNormalized($str, Normalizer::FORM_D)) {
        $str = Normalizer::normalize($str, Normalizer::FORM_C);
    }
    return $str;
}
Esempio n. 19
0
 public function filter($value)
 {
     $value = mb_ereg_replace(' +', ' ', trim($value));
     $value = mb_ereg_replace("[\r\t\n]", "", $value);
     // http://www.asciitable.com/
     $value = trim($value, "\"'&,");
     $value = preg_replace('/\\p{M}/u', '', Normalizer::normalize($value, Normalizer::FORM_D));
     $value = mb_strtolower($value, "UTF-8");
     $bye = array(' ', '\\"', '\'', '!', '@', '$', '%', '&', '*', '(', ')', ':', '=', '\'', '/', ';', '`', '<', '>', '[', ']', '?', '\\', ',', '#');
     $value = str_replace($bye, '', $value);
     $value = trim($value, ".");
     return $value;
 }
Esempio n. 20
0
 public function filter(array $terms)
 {
     $has_normalizer = class_exists("Normalizer", false);
     for ($i = 0, $max = sizeof($terms); $i < $max; $i++) {
         // убрать HTML Entities
         $terms[$i] = html_entity_decode($terms[$i], ENT_QUOTES, 'UTF-8');
         // Приведение к нормальной C-форме UTF
         if ($has_normalizer) {
             // расширение intl (PHP 5.2+)
             $terms[$i] = Normalizer::normalize($terms[$i], Normalizer::FORM_C);
         }
     }
     return $terms;
 }
Esempio n. 21
0
 public function login(Request $request, Auth $authModel)
 {
     $response = $authModel->loginWithId($request->input());
     // Handle error
     if (array_key_exists('error', $response)) {
         var_dump($response['message']);
         return;
     }
     $user = \Normalizer::user($response['data']);
     $user['token'] = $response['data']['token'];
     // Update session
     \Utils::setLoginSession($user);
     return redirect()->route('home')->withCookie('token', $user['token']);
 }
Esempio n. 22
0
 public function normalize($accent)
 {
     // https://github.com/jbroadway/urlify
     // return URLify::filter( $accent, 255, 'fa' );
     // return URLify::downcode( $accent );
     // http://stackoverflow.com/a/3542752
     // return iconv( 'UTF-8', 'UTF-8//TRANSLIT', $accent );
     // http://stackoverflow.com/questions/3371697/replacing-accented-characters-php
     // SEE: pure php normalizer : https://github.com/tchwork/utf8
     if (class_exists('Normalizer')) {
         $accent = preg_replace('/\\p{Mn}/u', '', Normalizer::normalize($accent, Normalizer::FORM_KD));
     }
     // http://stackoverflow.com/a/3542752
     return $accent;
 }
Esempio n. 23
0
 /**
  * Process a string to contain purely basic latin characters
  * 
  * This method processes text to remove diacritic marks and translate
  * ligatures into individual characters.
  * 
  * Requires the PHP intl extension and ICU.
  * 
  * @param string $original The string to process
  * @return string
  * 
  * @link http://ahinea.com/en/tech/accented-translate.html
  */
 public function normalise($original)
 {
     // Check to make sure the extension is available
     if (false === class_exists('Normalizer', false)) {
         return $original;
     }
     // map European characters onto two characters before removing diacritics
     $doubles = array('@\\x{00c4}@u' => 'AE', '@\\x{00d6}@u' => 'OE', '@\\x{00dc}@u' => 'UE', '@\\x{00e4}@u' => 'ae', '@\\x{00f6}@u' => 'oe', '@\\x{00fc}@u' => 'ue', '@\\x{00f1}@u' => 'ny', '@\\x{00ff}@u' => 'yu');
     $string = preg_replace(array_keys($doubles), array_values($doubles), $original);
     // map characters with diacritics on their base-character followed by the diacritical mark
     $string = \Normalizer::normalize($string, \Normalizer::FORM_D);
     $pairs = array('@\\pM@u' => '', '@\\x{00c6}@u' => 'AE', '@\\x{00e6}@u' => 'ae', '@\\x{00df}@u' => 'ss', '@\\x{0132}@u' => 'IJ', '@\\x{0133}@u' => 'ij', '@\\x{0152}@u' => 'OE', '@\\x{0153}@u' => 'oe', '@\\x{00d0}@u' => 'D', '@\\x{0110}@u' => 'D', '@\\x{0111}@u' => 'd', '@\\x{00f0}@u' => 'd', '@\\x{0126}@u' => 'H', '@\\x{0127}@u' => 'h', '@\\x{0131}@u' => 'i', '@\\x{0138}@u' => 'k', '@\\x{013f}@u' => 'L', '@\\x{0140}@u' => 'l', '@\\x{0141}@u' => 'L', '@\\x{0142}@u' => 'l', '@\\x{0149}@u' => 'n', '@\\x{014a}@u' => 'N', '@\\x{014b}@u' => 'n', '@\\x{00d8}@u' => 'O', '@\\x{00f8}@u' => 'o', '@\\x{017f}@u' => 's', '@\\x{00de}@u' => 'T', '@\\x{0166}@u' => 'T', '@\\x{00fe}@u' => 't', '@\\x{0167}@u' => 't', '@[^\\0-\\x80]@u' => '');
     $string = preg_replace(array_keys($pairs), array_values($pairs), $string);
     // Allow for possible errors in UTF8-regular-expressions
     return empty($string) ? $original : $string;
 }
Esempio n. 24
0
 public function __construct($string = '', string $inputEncoding = 'ISO-8859-1')
 {
     if ($string instanceof self) {
         $this->string = (string) $string->string;
     } else {
         $str = (string) $string;
         if ($str != '') {
             // 				if(!preg_match(self::PATTERN_UTF8, $str))
             if (!preg_match('//u', $str)) {
                 $str = mb_convert_encoding($str, 'UTF-8', $inputEncoding);
             }
             if (class_exists('Normalizer', false) && !\Normalizer::isNormalized($str)) {
                 $str = \Normalizer::normalize($str);
             }
             $this->string = (string) $str;
         }
     }
 }
 /**
  * @covers Patchwork\PHP\Override\Normalizer::normalize
  */
 function testNormalize()
 {
     $c = in::normalize("déjà", pn::NFC) . in::normalize("훈쇼™", pn::NFD);
     $this->assertSame($c, pn::normalize($c, pn::NONE));
     $this->assertSame($c, in::normalize($c, pn::NONE));
     $c = "déjà 훈쇼™";
     $d = in::normalize($c, pn::NFD);
     $kc = in::normalize($c, pn::NFKC);
     $kd = in::normalize($c, pn::NFKD);
     $this->assertSame('', pn::normalize(''));
     $this->assertSame($c, pn::normalize($d));
     $this->assertSame($c, pn::normalize($d, pn::NFC));
     $this->assertSame($d, pn::normalize($c, pn::NFD));
     $this->assertSame($kc, pn::normalize($d, pn::NFKC));
     $this->assertSame($kd, pn::normalize($c, pn::NFKD));
     $this->assertFalse(pn::normalize($c, -1));
     $this->assertFalse(pn::normalize("ÿ"));
 }
Esempio n. 26
0
function search_index__str_normalize(&$o, $s)
{
    $jo_db =& JFactory::getDBO();
    /* Do lowercase */
    $s = $o->oCase->lc($s);
    /* Use PECL extension */
    if (class_exists('Normalizer')) {
        return Normalizer::normalize($s, Normalizer::FORM_C);
    }
    /* */
    preg_match_all("/./u", $s, $ar);
    $ar = $ar[0];
    $ar_c_crc = array();
    /* For each character */
    foreach ($ar as $k => &$v) {
        /* Use values as key */
        /* PHP-bug: sometimes a string keys becomes interger */
        $ar_c_crc[$v] = sprintf("%u", crc32($v));
    }
    unset($v);
    if (empty($ar_c_crc)) {
        return $s;
    }
    /* */
    $query = 'SELECT `str_from`, `str_to`' . ' FROM ' . $o->V->db_name . '.' . $o->V->table_prefix . 'unicode_normalization ' . ' WHERE `crc32u` IN (' . implode(',', array_values($ar_c_crc)) . ')';
    $jo_db->setQuery($query);
    $ar_sql = $jo_db->loadAssocList();
    if (is_null($ar_sql)) {
        $ar_sql = array();
    }
    /* Normalize text */
    foreach ($ar_sql as $k => &$v) {
        $s = str_replace(urldecode($v['str_from']), urldecode($v['str_to']), $s);
        unset($ar_sql[$k]);
    }
    unset($v);
    return $s;
}
Esempio n. 27
0
/**
$input = Normalizer::normalize($input,Normalizer::FORM_C);
echo "$input|\n";
echo "strlen:".strlen($input)."|\n";
echo "strlen_dec:".strlen(utf8_decode($input))."|\n";
echo "count:".count($input)."|\n";
echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n";
var_dump ( $input);
$input = Normalizer::normalize($input, Normalizer::FORM_D);
echo "$input|\n";
echo "strlen:".strlen($input)."|\n";
echo "strlen_dec:".strlen(utf8_decode($input))."|\n";
echo "count:".count($input)."|\n";
echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n";
var_dump ( $input);
*/
function printme($input, $nf)
{
    $input = Normalizer::normalize($input, $nf);
    switch ($nf) {
        case Normalizer::FORM_C:
            $nf = "NFC";
            break;
        case Normalizer::FORM_D:
            $nf = "NFD";
            break;
        case Normalizer::FORM_KC:
            $nf = "NFKC";
            break;
        case Normalizer::FORM_KD:
            $nf = "NFKD";
            break;
    }
    echo "/***" . $nf . "***" . $input . "***\n";
    var_dump($input);
    echo "strlen_dec:" . strlen(utf8_decode($input)) . "\n";
    echo "count_chars:" . count_chars_unicode($input) . "\n";
    echo "\n";
}
Esempio n. 28
0
 public function str_normalize($s)
 {
     $s = $this->oCase->lc($s);
     /* Use PECL extension */
     if (class_exists('Normalizer')) {
         return Normalizer::normalize($s, Normalizer::FORM_C);
     }
     /* */
     preg_match_all("/./u", $s, $ar);
     $ar = $ar[0];
     $ar_c_crc = array();
     /* For each character */
     foreach ($ar as $k => &$v) {
         /* Use values as key */
         /* PHP-bug: sometimes a string keys becomes interger */
         $ar_c_crc[$v] = sprintf("%u", crc32($v));
     }
     unset($v);
     if (empty($ar_c_crc)) {
         return $s;
     }
     /* */
     $is_debug_q = $this->oDb->is_debug_q;
     $this->oDb->is_debug_q = false;
     $this->oDb->select('str_from, str_to');
     $this->oDb->from('unicode_normalization');
     $this->oDb->where_in('crc32u', array_values($ar_c_crc));
     $ar_sql = $this->oDb->get()->result_array();
     $this->oDb->is_debug_q = $is_debug_q;
     /* Normalize text */
     foreach ($ar_sql as $k => &$v) {
         $s = str_replace(urldecode($v['str_from']), urldecode($v['str_to']), $s);
         unset($ar_sql[$k]);
     }
     unset($v);
     return $s;
 }
Esempio n. 29
0
 /**
  * Sanitizes a string, replacing whitespace and a few other characters with dashes.
  *
  * Limits the output to alphanumeric characters, underscore (_) and dash (-).
  * Whitespace becomes a dash.
  *
  * @param string $string The string to be sanitized.
  * @return string The sanitized string.
  */
 public static function string($string = null)
 {
     if (empty($string)) {
         throw new \InvalidArgumentException('No input string is given');
     }
     $string = strip_tags($string);
     // Preserve escaped octets.
     $string = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $string);
     // Remove percent signs that are not part of an octet.
     $string = str_replace('%', '', $string);
     // Restore octets.
     $string = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $string);
     if (function_exists('mb_strtolower')) {
         $string = mb_strtolower($string, 'UTF-8');
     } else {
         $string = strtolower($string);
     }
     $string = preg_replace('/\\p{Mn}/u', '', \Normalizer::normalize($string, \Normalizer::FORM_KD));
     $string = preg_replace('/[^%a-z0-9 _-]/', '', $string);
     $string = preg_replace('/\\s+/', '-', $string);
     $string = preg_replace('|-+|', '-', $string);
     $string = trim($string, '-');
     return $string;
 }
Esempio n. 30
0
 private function normalize($str, $opts)
 {
     if ($opts['nfc'] || $opts['nfkc']) {
         if (class_exists('Normalizer', false)) {
             if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) {
                 $str = Normalizer::normalize($str, Normalizer::FORM_C);
             }
             if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) {
                 $str = Normalizer::normalize($str, Normalizer::FORM_KC);
             }
         } else {
             if (!class_exists('I18N_UnicodeNormalizer', false)) {
                 @(include_once 'I18N/UnicodeNormalizer.php');
             }
             if (class_exists('I18N_UnicodeNormalizer', false)) {
                 $normalizer = new I18N_UnicodeNormalizer();
                 if ($opts['nfc']) {
                     $str = $normalizer->normalize($str, 'NFC');
                 }
                 if ($opts['nfkc']) {
                     $str = $normalizer->normalize($str, 'NFKC');
                 }
             }
         }
     }
     if ($opts['lowercase']) {
         $str = strtolower($str);
     }
     if ($opts['convmap'] && is_array($opts['convmap'])) {
         $str = strtr($str, $opts['convmap']);
     }
     return $str;
 }