Beispiel #1
0
function normalize_string($str)
{
    // 半角カタカナを全角カタカナにする。半角濁点の場合は濁点付き全角1文字にする。全角英数字を半角英数字にする。
    $str = mb_convert_kana($str, "KVas", "UTF-8");
    // 改行コードをPHP_EOLに統一する(セル内改行もPHP_EOLになる)。
    $str = ereg_replace("\r\n|\r|\n", PHP_EOL, $str);
    // NFDがあればNFCに正規化する。
    if (Normalizer::isNormalized($str, Normalizer::FORM_D)) {
        $str = Normalizer::normalize($str, Normalizer::FORM_C);
    }
    return $str;
}
Beispiel #2
0
 public function __construct($string = '', string $inputEncoding = 'ISO-8859-1')
 {
     if ($string instanceof self) {
         $this->string = (string) $string->string;
     } else {
         $str = (string) $string;
         if ($str != '') {
             // 				if(!preg_match(self::PATTERN_UTF8, $str))
             if (!preg_match('//u', $str)) {
                 $str = mb_convert_encoding($str, 'UTF-8', $inputEncoding);
             }
             if (class_exists('Normalizer', false) && !\Normalizer::isNormalized($str)) {
                 $str = \Normalizer::normalize($str);
             }
             $this->string = (string) $str;
         }
     }
 }
Beispiel #3
0
 private function normalize($str, $opts)
 {
     if (class_exists('Normalizer')) {
         if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) {
             $str = Normalizer::normalize($str, Normalizer::FORM_C);
         }
         if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) {
             $str = Normalizer::normalize($str, Normalizer::FORM_KC);
         }
     } else {
         if (!class_exists('I18N_UnicodeNormalizer')) {
             @(include_once 'I18N/UnicodeNormalizer.php');
         }
         if (class_exists('I18N_UnicodeNormalizer')) {
             $normalizer = new I18N_UnicodeNormalizer();
             if ($opts['nfc']) {
                 $str = $normalizer->normalize($str, 'NFC');
             }
             if ($opts['nfkc']) {
                 $str = $normalizer->normalize($str, 'NFKC');
             }
         }
     }
     return $str;
 }
Beispiel #4
0
 static function filter($var, $normalization_form = 4, $leading_combining = '◌')
 {
     switch (gettype($var)) {
         case 'array':
             foreach ($var as $k => $v) {
                 $var[$k] = static::filter($v, $normalization_form, $leading_combining);
             }
             break;
         case 'object':
             foreach ($var as $k => $v) {
                 $var->{$k} = static::filter($v, $normalization_form, $leading_combining);
             }
             break;
         case 'string':
             if (false !== strpos($var, "\r")) {
                 // Workaround https://bugs.php.net/65732
                 $var = str_replace("\r\n", "\n", $var);
                 $var = strtr($var, "\r", "\n");
             }
             if (preg_match('/[\\x80-\\xFF]/', $var)) {
                 if (n::isNormalized($var, $normalization_form)) {
                     $n = '';
                 } else {
                     $n = n::normalize($var, $normalization_form);
                     if (false === $n) {
                         $var = static::utf8_encode($var);
                     } else {
                         $var = $n;
                     }
                 }
                 if ($var[0] >= "€" && false !== $n && isset($leading_combining[0]) && preg_match('/^\\p{Mn}/u', $var)) {
                     // Prevent leading combining chars
                     // for NFC-safe concatenations.
                     $var = $leading_combining . $var;
                 }
             }
             break;
     }
     return $var;
 }
Beispiel #5
0
 /**
  * wrapper around PHP's native normalizer from intl
  * previously a PECL extension, included in the core since PHP 5.3.0
  * http://php.net/manual/en/normalizer.normalize.php
  *
  * @param	mixed	$strings	a string or an array of strings to normalize
  * @return	mixed				the normalized content, preserving array keys if array given.
  */
 function utf8_normalize_nfc($strings)
 {
     if (empty($strings)) {
         return $strings;
     }
     if (!is_array($strings)) {
         if (Normalizer::isNormalized($strings)) {
             return $strings;
         }
         return (string) Normalizer::normalize($strings);
     } else {
         foreach ($strings as $key => $string) {
             if (is_array($string)) {
                 foreach ($string as $_key => $_string) {
                     if (Normalizer::isNormalized($strings[$key][$_key])) {
                         continue;
                     }
                     $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
                 }
             } else {
                 if (Normalizer::isNormalized($strings[$key])) {
                     continue;
                 }
                 $strings[$key] = (string) Normalizer::normalize($strings[$key]);
             }
         }
     }
     return $strings;
 }
Beispiel #6
0
 /**
  * Determines if a string is normalized according to a specified Unicode normal form.
  *
  * @param  string $string The string to be looked into.
  * @param  enum $form **OPTIONAL. Default is** `NF_C`. The Unicode normal form to be verified against. The
  * possible normal forms are `NF_C`, `NF_D`, `NF_KC`, and `NF_KD` (see [Summary](#summary)).
  *
  * @return bool `true` if the string appears to be normalized according to the normal form specified, `false`
  * otherwise.
  */
 public static function isNormalized($string, $form = self::NF_C)
 {
     assert('is_cstring($string) && is_enum($form)', vs(isset($this), get_defined_vars()));
     return Normalizer::isNormalized($string, self::normFormToNc($form));
 }
 /**
  * 入力をアーカイブ中で妥当な拡張子を除くファイル名に変換します。
  * @param string $filenameWithoutExtension 拡張子を除くファイル名。
  * @return string 制御文字・空白文字のみで構成されていた場合、ランダムな文字列生成します。
  */
 public function convertToValidFilenameWithoutExtensionInArchives(string $filenameWithoutExtension) : string
 {
     $asciiString = $this->preventWindowsReserved(mb_substr(trim(preg_replace('/[^0-9_a-z]+/u', '-', \Stringy\StaticStringy::dasherize(Transliterator::translateUsingLatinAlphabet(preg_replace('/^\\p{Z}+|\\p{C}+|\\p{Z}+$/u', '', \Normalizer::isNormalized($filenameWithoutExtension) ? $filenameWithoutExtension : \Normalizer::normalize($filenameWithoutExtension))))), '-'), 0, static::MAX_LENGTH));
     return $asciiString === '' ? $this->generateRandomFilename() : $asciiString;
 }
Beispiel #8
0
 static function filterString($s, $normalization_form = 4, $leading_combining = '◌')
 {
     if (false !== strpos($s, "\r")) {
         // Workaround https://bugs.php.net/65732
         $s = str_replace("\r\n", "\n", $s);
         $s = strtr($s, "\r", "\n");
     }
     if (preg_match('/[\\x80-\\xFF]/', $s)) {
         if (n::isNormalized($s, $normalization_form)) {
             $n = '';
         } else {
             $n = n::normalize($s, $normalization_form);
             if (false === $n) {
                 $s = u::utf8_encode($s);
             } else {
                 $s = $n;
             }
         }
         if ($s[0] >= "�" && false !== $n && isset($leading_combining[0]) && preg_match('/^\\p{Mn}/u', $s)) {
             // Prevent leading combining chars
             // for NFC-safe concatenations.
             $s = $leading_combining . $s;
         }
     }
     return $s;
 }
 /**
  * 入力値がanswerフィールドの規則に違反していなければ真を返します。
  * @param string $input
  * @return bool
  */
 protected function validate(string $input) : bool
 {
     return \Normalizer::isNormalized($input, \Normalizer::FORM_KC) && ($this->isRegExp($input) ? $this->validateRegexp($input) : $this->validateCharacters($input));
 }
Beispiel #10
0
 protected static function get_text(&$value, &$args)
 {
     if (!is_scalar($value)) {
         return false;
     }
     $result = preg_replace('/[\\x00-\\x08\\x0B\\x0E-\\x1A\\x1C-\\x1F]+/', '', $value);
     false !== strpos($result, "\r") && ($result = strtr(str_replace("\r\n", "\n", $result), "\r", "\n"));
     Normalizer::isNormalized($result) || ($result = Normalizer::normalize($result));
     if (isset($args[0])) {
         $rx = implode(':', $args);
         $rx = preg_replace('/(?<!\\\\)((?:\\\\\\\\)*)@/', '$1\\@', $rx);
         if (!preg_match("@^(?:{$rx})\$@Dsu", $result)) {
             return false;
         }
     }
     return $result;
 }
 static function strtoupper($s, $form = n::NFC)
 {
     if (n::isNormalized($s = mb_strtoupper($s, 'UTF-8'), $form)) {
         return $s;
     }
     return n::normalize($s, $form);
 }
Beispiel #12
0
 /**
  * OSのファイルシステムにあわせて文字コード変換を行う
  *
  * @param string $name ファイル名
  * @return string
  */
 protected function _convertFilename($name)
 {
     // Mac上でNC3をつかってるケースの対策
     // Macファイルシステムでは濁点文字が2つの文字になるNFDなのをNFCに変換する
     if (class_exists('Normalizer')) {
         if (Normalizer::isNormalized($name, Normalizer::FORM_D)) {
             $name = Normalizer::normalize($name, Normalizer::FORM_C);
         }
     }
     $name = mb_convert_encoding($name, $this->_clientOsEncoding, 'auto');
     return $name;
 }
Beispiel #13
0
 /**
  * Checks whether the last path section of the given path exists in NFC or NFD form
  * and returns the correct form. If no existing path found, returns null.
  *
  * @param string $basePath base path to check
  * @param string $lastSection last section of the path to check for NFD/NFC variations
  *
  * @return string|null original or converted path, or null if none of the forms was found
  */
 private function findPathToUseLastSection($basePath, $lastSection)
 {
     $fullPath = $basePath . $lastSection;
     if ($lastSection === '' || $this->isAscii($lastSection) || $this->storage->file_exists($fullPath)) {
         $this->namesCache[$fullPath] = $fullPath;
         return $fullPath;
     }
     // swap encoding
     if (\Normalizer::isNormalized($lastSection, \Normalizer::FORM_C)) {
         $otherFormPath = \Normalizer::normalize($lastSection, \Normalizer::FORM_D);
     } else {
         $otherFormPath = \Normalizer::normalize($lastSection, \Normalizer::FORM_C);
     }
     $otherFullPath = $basePath . $otherFormPath;
     if ($this->storage->file_exists($otherFullPath)) {
         $this->namesCache[$fullPath] = $otherFullPath;
         return $otherFullPath;
     }
     // return original path, file did not exist at all
     $this->namesCache[$fullPath] = $fullPath;
     return null;
 }
Beispiel #14
0
<?php

$input = $_REQUEST['input'];
echo "Encoding: " . mb_detect_encoding($input) . "\n";
echo " is normalized NFC? " . Normalizer::isNormalized($input, Normalizer::FORM_C) . "\n";
echo " is normalized NFD? " . Normalizer::isNormalized(Normalizer::isNormalized(Normalizer::isNormalized($input, Normalizer::FORM_D))) . "\n";
echo " is normalized NFKC? " . Normalizer::isNormalized($input, Normalizer::FORM_KC) . "\n";
echo " is normalized NFKD? " . Normalizer::isNormalized($input, Normalizer::FORM_KD) . "\n";
printme($input, Normalizer::FORM_C);
printme($input, Normalizer::FORM_D);
printme($input, Normalizer::FORM_KC);
printme($input, Normalizer::FORM_KD);
/**
$input = Normalizer::normalize($input,Normalizer::FORM_C);
echo "$input|\n";
echo "strlen:".strlen($input)."|\n";
echo "strlen_dec:".strlen(utf8_decode($input))."|\n";
echo "count:".count($input)."|\n";
echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n";
var_dump ( $input);
$input = Normalizer::normalize($input, Normalizer::FORM_D);
echo "$input|\n";
echo "strlen:".strlen($input)."|\n";
echo "strlen_dec:".strlen(utf8_decode($input))."|\n";
echo "count:".count($input)."|\n";
echo "NFC?:".Normalizer::isNormalized($input,Normalizer::FORM_C)."|\n";
var_dump ( $input);
*/
function printme($input, $nf)
{
    $input = Normalizer::normalize($input, $nf);
Beispiel #15
0
	/**
	 * Normalize a unicode string
	 *
	 * @param string $value a not normalized string
	 * @return bool|string
	 */
	public static function normalizeUnicode($value) {
		if(Normalizer::isNormalized($value)) {
			return $value;
		}

		$normalizedValue = Normalizer::normalize($value);
		if ($normalizedValue === null || $normalizedValue === false) {
			\OC::$server->getLogger()->warning('normalizing failed for "' . $value . '"', ['app' => 'core']);
			return $value;
		}

		return $normalizedValue;
	}
 /**
  * 入力値が妥当なファイル名であれば真を返します。
  * @see https://github.com/esperecyan/dictionary/blob/master/dictionary.md#valid-filename
  * @param string $input
  * @return bool
  */
 protected function validateFilename(string $input) : bool
 {
     return \Normalizer::isNormalized($input) && preg_match('/^(?!(CON|PRN|AUX|NUL|(LPT|COM)[1-9]|\\p{Z}.*|.*\\p{Z})\\.)[^\\p{C}"*.\\/:<>?\\\\|]+\\.(' . implode('|', $this->fieldName ? FilenameValidator::EXTENSIONS[$this->fieldName] : call_user_func_array('array_merge', FilenameValidator::EXTENSIONS)) . ')$/ui', $input) === 1;
 }
 private function normalize($str, $opts)
 {
     if ($opts['nfc'] || $opts['nfkc']) {
         if (class_exists('Normalizer', false)) {
             if ($opts['nfc'] && !Normalizer::isNormalized($str, Normalizer::FORM_C)) {
                 $str = Normalizer::normalize($str, Normalizer::FORM_C);
             }
             if ($opts['nfkc'] && !Normalizer::isNormalized($str, Normalizer::FORM_KC)) {
                 $str = Normalizer::normalize($str, Normalizer::FORM_KC);
             }
         } else {
             if (!class_exists('I18N_UnicodeNormalizer', false)) {
                 @(include_once 'I18N/UnicodeNormalizer.php');
             }
             if (class_exists('I18N_UnicodeNormalizer', false)) {
                 $normalizer = new I18N_UnicodeNormalizer();
                 if ($opts['nfc']) {
                     $str = $normalizer->normalize($str, 'NFC');
                 }
                 if ($opts['nfkc']) {
                     $str = $normalizer->normalize($str, 'NFKC');
                 }
             }
         }
     }
     if ($opts['lowercase']) {
         $str = strtolower($str);
     }
     if ($opts['convmap'] && is_array($opts['convmap'])) {
         $str = strtr($str, $opts['convmap']);
     }
     return $str;
 }
Beispiel #18
0
 public function postTW($post)
 {
     if ($post['image'] == 0 && $post['text'] == '' && $post['url'] == '') {
         return;
     }
     // require codebird
     require_once LIB_DIR . 'codebird-php-develop/src/codebird.php';
     \Codebird\Codebird::setConsumerKey(TW_API_KEY, TW_API_SECRET);
     $cb = \Codebird\Codebird::getInstance();
     $cb->setToken(TW_ACCESS_TOKEN, TW_ACCESS_TOKEN_SECRET);
     $params = [];
     $params['status'] = $post['text'];
     if (!Normalizer::isNormalized($params['status'], Normalizer::FORM_C)) {
         $params['status'] = Normalizer::normalize($params['status'], Normalizer::FORM_C);
     }
     //        if ($post['url'] != '') $params['status'] .= ($params['status'] == '' ? '' : ' ').$post['url'];
     if ($post['image'] > 0) {
         $params['media[]'] = $post['image_file'];
     }
     if ($post['image'] > 0) {
         $reply = $cb->statuses_updateWithMedia($params);
     } else {
         $reply = $cb->statuses_update($params);
     }
     $status = $reply->httpstatus;
     if ($status == 200) {
         $sql = "update posts set published = 1, social_id = ? where `date` = ? and `type` = ?";
         $this->dsp->db->Execute($sql, $reply->id, $post['date'], $post['type']);
         echo 'Twitter success: post id ' . $reply->id . PHP_EOL;
     } else {
         echo 'Twitter failed: ' . print_r($reply, true) . PHP_EOL;
         $this->errors[] = ['type' => 'tw', 'message' => 'failed posting', 'reply' => print_r($reply, true)];
     }
 }