function normalize_string($str) { // 半角カタカナを全角カタカナにする。半角濁点の場合は濁点付き全角1文字にする。全角英数字を半角英数字にする。 $str = mb_convert_kana($str, "KVas", "UTF-8"); // 改行コードをPHP_EOLに統一する(セル内改行もPHP_EOLになる)。 $str = ereg_replace("\r\n|\r|\n", PHP_EOL, $str); // NFDがあればNFCに正規化する。 if (Normalizer::isNormalized($str, Normalizer::FORM_D)) { $str = Normalizer::normalize($str, Normalizer::FORM_C); } return $str; }
public function __construct($string = '', string $inputEncoding = 'ISO-8859-1') { if ($string instanceof self) { $this->string = (string) $string->string; } else { $str = (string) $string; if ($str != '') { // if(!preg_match(self::PATTERN_UTF8, $str)) if (!preg_match('//u', $str)) { $str = mb_convert_encoding($str, 'UTF-8', $inputEncoding); } if (class_exists('Normalizer', false) && !\Normalizer::isNormalized($str)) { $str = \Normalizer::normalize($str); } $this->string = (string) $str; } } }
private function normalize($str, $opts) { if (class_exists('Normalizer')) { if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) { $str = Normalizer::normalize($str, Normalizer::FORM_C); } if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) { $str = Normalizer::normalize($str, Normalizer::FORM_KC); } } else { if (!class_exists('I18N_UnicodeNormalizer')) { @(include_once 'I18N/UnicodeNormalizer.php'); } if (class_exists('I18N_UnicodeNormalizer')) { $normalizer = new I18N_UnicodeNormalizer(); if ($opts['nfc']) { $str = $normalizer->normalize($str, 'NFC'); } if ($opts['nfkc']) { $str = $normalizer->normalize($str, 'NFKC'); } } } return $str; }
static function filter($var, $normalization_form = 4, $leading_combining = 'â—Œ') { switch (gettype($var)) { case 'array': foreach ($var as $k => $v) { $var[$k] = static::filter($v, $normalization_form, $leading_combining); } break; case 'object': foreach ($var as $k => $v) { $var->{$k} = static::filter($v, $normalization_form, $leading_combining); } break; case 'string': if (false !== strpos($var, "\r")) { // Workaround https://bugs.php.net/65732 $var = str_replace("\r\n", "\n", $var); $var = strtr($var, "\r", "\n"); } if (preg_match('/[\\x80-\\xFF]/', $var)) { if (n::isNormalized($var, $normalization_form)) { $n = ''; } else { $n = n::normalize($var, $normalization_form); if (false === $n) { $var = static::utf8_encode($var); } else { $var = $n; } } if ($var[0] >= "€" && false !== $n && isset($leading_combining[0]) && preg_match('/^\\p{Mn}/u', $var)) { // Prevent leading combining chars // for NFC-safe concatenations. $var = $leading_combining . $var; } } break; } return $var; }
/** * wrapper around PHP's native normalizer from intl * previously a PECL extension, included in the core since PHP 5.3.0 * http://php.net/manual/en/normalizer.normalize.php * * @param mixed $strings a string or an array of strings to normalize * @return mixed the normalized content, preserving array keys if array given. */ function utf8_normalize_nfc($strings) { if (empty($strings)) { return $strings; } if (!is_array($strings)) { if (Normalizer::isNormalized($strings)) { return $strings; } return (string) Normalizer::normalize($strings); } else { foreach ($strings as $key => $string) { if (is_array($string)) { foreach ($string as $_key => $_string) { if (Normalizer::isNormalized($strings[$key][$_key])) { continue; } $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]); } } else { if (Normalizer::isNormalized($strings[$key])) { continue; } $strings[$key] = (string) Normalizer::normalize($strings[$key]); } } } return $strings; }
/** * Determines if a string is normalized according to a specified Unicode normal form. * * @param string $string The string to be looked into. * @param enum $form **OPTIONAL. Default is** `NF_C`. The Unicode normal form to be verified against. The * possible normal forms are `NF_C`, `NF_D`, `NF_KC`, and `NF_KD` (see [Summary](#summary)). * * @return bool `true` if the string appears to be normalized according to the normal form specified, `false` * otherwise. */ public static function isNormalized($string, $form = self::NF_C) { assert('is_cstring($string) && is_enum($form)', vs(isset($this), get_defined_vars())); return Normalizer::isNormalized($string, self::normFormToNc($form)); }
/** * 入力をアーカイブ中で妥当な拡張子を除くファイル名に変換します。 * @param string $filenameWithoutExtension 拡張子を除くファイル名。 * @return string 制御文字・空白文字のみで構成されていた場合、ランダムな文字列生成します。 */ public function convertToValidFilenameWithoutExtensionInArchives(string $filenameWithoutExtension) : string { $asciiString = $this->preventWindowsReserved(mb_substr(trim(preg_replace('/[^0-9_a-z]+/u', '-', \Stringy\StaticStringy::dasherize(Transliterator::translateUsingLatinAlphabet(preg_replace('/^\\p{Z}+|\\p{C}+|\\p{Z}+$/u', '', \Normalizer::isNormalized($filenameWithoutExtension) ? $filenameWithoutExtension : \Normalizer::normalize($filenameWithoutExtension))))), '-'), 0, static::MAX_LENGTH)); return $asciiString === '' ? $this->generateRandomFilename() : $asciiString; }
static function filterString($s, $normalization_form = 4, $leading_combining = '◌') { if (false !== strpos($s, "\r")) { // Workaround https://bugs.php.net/65732 $s = str_replace("\r\n", "\n", $s); $s = strtr($s, "\r", "\n"); } if (preg_match('/[\\x80-\\xFF]/', $s)) { if (n::isNormalized($s, $normalization_form)) { $n = ''; } else { $n = n::normalize($s, $normalization_form); if (false === $n) { $s = u::utf8_encode($s); } else { $s = $n; } } if ($s[0] >= "�" && false !== $n && isset($leading_combining[0]) && preg_match('/^\\p{Mn}/u', $s)) { // Prevent leading combining chars // for NFC-safe concatenations. $s = $leading_combining . $s; } } return $s; }
/** * 入力値がanswerフィールドの規則に違反していなければ真を返します。 * @param string $input * @return bool */ protected function validate(string $input) : bool { return \Normalizer::isNormalized($input, \Normalizer::FORM_KC) && ($this->isRegExp($input) ? $this->validateRegexp($input) : $this->validateCharacters($input)); }
protected static function get_text(&$value, &$args) { if (!is_scalar($value)) { return false; } $result = preg_replace('/[\\x00-\\x08\\x0B\\x0E-\\x1A\\x1C-\\x1F]+/', '', $value); false !== strpos($result, "\r") && ($result = strtr(str_replace("\r\n", "\n", $result), "\r", "\n")); Normalizer::isNormalized($result) || ($result = Normalizer::normalize($result)); if (isset($args[0])) { $rx = implode(':', $args); $rx = preg_replace('/(?<!\\\\)((?:\\\\\\\\)*)@/', '$1\\@', $rx); if (!preg_match("@^(?:{$rx})\$@Dsu", $result)) { return false; } } return $result; }
static function strtoupper($s, $form = n::NFC) { if (n::isNormalized($s = mb_strtoupper($s, 'UTF-8'), $form)) { return $s; } return n::normalize($s, $form); }
/** * OSのファイルシステムにあわせて文字コード変換を行う * * @param string $name ファイル名 * @return string */ protected function _convertFilename($name) { // Mac上でNC3をつかってるケースの対策 // Macファイルシステムでは濁点文字が2つの文字になるNFDなのをNFCに変換する if (class_exists('Normalizer')) { if (Normalizer::isNormalized($name, Normalizer::FORM_D)) { $name = Normalizer::normalize($name, Normalizer::FORM_C); } } $name = mb_convert_encoding($name, $this->_clientOsEncoding, 'auto'); return $name; }
/** * Checks whether the last path section of the given path exists in NFC or NFD form * and returns the correct form. If no existing path found, returns null. * * @param string $basePath base path to check * @param string $lastSection last section of the path to check for NFD/NFC variations * * @return string|null original or converted path, or null if none of the forms was found */ private function findPathToUseLastSection($basePath, $lastSection) { $fullPath = $basePath . $lastSection; if ($lastSection === '' || $this->isAscii($lastSection) || $this->storage->file_exists($fullPath)) { $this->namesCache[$fullPath] = $fullPath; return $fullPath; } // swap encoding if (\Normalizer::isNormalized($lastSection, \Normalizer::FORM_C)) { $otherFormPath = \Normalizer::normalize($lastSection, \Normalizer::FORM_D); } else { $otherFormPath = \Normalizer::normalize($lastSection, \Normalizer::FORM_C); } $otherFullPath = $basePath . $otherFormPath; if ($this->storage->file_exists($otherFullPath)) { $this->namesCache[$fullPath] = $otherFullPath; return $otherFullPath; } // return original path, file did not exist at all $this->namesCache[$fullPath] = $fullPath; return null; }
<?php $input = $_REQUEST['input']; echo "Encoding: " . mb_detect_encoding($input) . "\n"; echo " is normalized NFC? " . Normalizer::isNormalized($input, Normalizer::FORM_C) . "\n"; echo " is normalized NFD? " . Normalizer::isNormalized(Normalizer::isNormalized(Normalizer::isNormalized($input, Normalizer::FORM_D))) . "\n"; echo " is normalized NFKC? " . Normalizer::isNormalized($input, Normalizer::FORM_KC) . "\n"; echo " is normalized NFKD? " . Normalizer::isNormalized($input, Normalizer::FORM_KD) . "\n"; printme($input, Normalizer::FORM_C); printme($input, Normalizer::FORM_D); printme($input, Normalizer::FORM_KC); printme($input, Normalizer::FORM_KD); /** $input = Normalizer::normalize($input,Normalizer::FORM_C); echo "$input|\n"; echo "strlen:".strlen($input)."|\n"; echo "strlen_dec:".strlen(utf8_decode($input))."|\n"; echo "count:".count($input)."|\n"; echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n"; var_dump ( $input); $input = Normalizer::normalize($input, Normalizer::FORM_D); echo "$input|\n"; echo "strlen:".strlen($input)."|\n"; echo "strlen_dec:".strlen(utf8_decode($input))."|\n"; echo "count:".count($input)."|\n"; echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n"; var_dump ( $input); */ function printme($input, $nf) { $input = Normalizer::normalize($input, $nf);
/** * Normalize a unicode string * * @param string $value a not normalized string * @return bool|string */ public static function normalizeUnicode($value) { if(Normalizer::isNormalized($value)) { return $value; } $normalizedValue = Normalizer::normalize($value); if ($normalizedValue === null || $normalizedValue === false) { \OC::$server->getLogger()->warning('normalizing failed for "' . $value . '"', ['app' => 'core']); return $value; } return $normalizedValue; }
/** * 入力値が妥当なファイル名であれば真を返します。 * @see https://github.com/esperecyan/dictionary/blob/master/dictionary.md#valid-filename * @param string $input * @return bool */ protected function validateFilename(string $input) : bool { return \Normalizer::isNormalized($input) && preg_match('/^(?!(CON|PRN|AUX|NUL|(LPT|COM)[1-9]|\\p{Z}.*|.*\\p{Z})\\.)[^\\p{C}"*.\\/:<>?\\\\|]+\\.(' . implode('|', $this->fieldName ? FilenameValidator::EXTENSIONS[$this->fieldName] : call_user_func_array('array_merge', FilenameValidator::EXTENSIONS)) . ')$/ui', $input) === 1; }
private function normalize($str, $opts) { if ($opts['nfc'] || $opts['nfkc']) { if (class_exists('Normalizer', false)) { if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) { $str = Normalizer::normalize($str, Normalizer::FORM_C); } if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) { $str = Normalizer::normalize($str, Normalizer::FORM_KC); } } else { if (!class_exists('I18N_UnicodeNormalizer', false)) { @(include_once 'I18N/UnicodeNormalizer.php'); } if (class_exists('I18N_UnicodeNormalizer', false)) { $normalizer = new I18N_UnicodeNormalizer(); if ($opts['nfc']) { $str = $normalizer->normalize($str, 'NFC'); } if ($opts['nfkc']) { $str = $normalizer->normalize($str, 'NFKC'); } } } } if ($opts['lowercase']) { $str = strtolower($str); } if ($opts['convmap'] && is_array($opts['convmap'])) { $str = strtr($str, $opts['convmap']); } return $str; }
public function postTW($post) { if ($post['image'] == 0 && $post['text'] == '' && $post['url'] == '') { return; } // require codebird require_once LIB_DIR . 'codebird-php-develop/src/codebird.php'; \Codebird\Codebird::setConsumerKey(TW_API_KEY, TW_API_SECRET); $cb = \Codebird\Codebird::getInstance(); $cb->setToken(TW_ACCESS_TOKEN, TW_ACCESS_TOKEN_SECRET); $params = []; $params['status'] = $post['text']; if (!Normalizer::isNormalized($params['status'], Normalizer::FORM_C)) { $params['status'] = Normalizer::normalize($params['status'], Normalizer::FORM_C); } // if ($post['url'] != '') $params['status'] .= ($params['status'] == '' ? '' : ' ').$post['url']; if ($post['image'] > 0) { $params['media[]'] = $post['image_file']; } if ($post['image'] > 0) { $reply = $cb->statuses_updateWithMedia($params); } else { $reply = $cb->statuses_update($params); } $status = $reply->httpstatus; if ($status == 200) { $sql = "update posts set published = 1, social_id = ? where `date` = ? and `type` = ?"; $this->dsp->db->Execute($sql, $reply->id, $post['date'], $post['type']); echo 'Twitter success: post id ' . $reply->id . PHP_EOL; } else { echo 'Twitter failed: ' . print_r($reply, true) . PHP_EOL; $this->errors[] = ['type' => 'tw', 'message' => 'failed posting', 'reply' => print_r($reply, true)]; } }