/**
  * 「ああ」「あい」のように組み合わせた2文字のひらがなによる配列を返します。
  * @return string[] 要素の数は500以上。
  */
 protected function generateHiraganaWords() : array
 {
     if ($this->hiraganaWords) {
         return $this->hiraganaWords;
     }
     for ($i = \IntlChar::ord('ぁ'), $l = $i + sqrt(500); $i < $l; $i++) {
         $firsts[] = \IntlChar::chr($i);
     }
     $seconds = $firsts;
     foreach ($firsts as $first) {
         foreach ($seconds as $second) {
             $this->hiraganaWords[] = "{$first}{$second}";
         }
     }
     return $this->hiraganaWords;
 }
 /**
  * スペースを除くASCII印字可能文字を、対応する全角形に置き換えます。
  * @param string $str
  * @return string
  */
 protected function convertToFullwidth(string $str) : string
 {
     return preg_replace_callback('/[!-~]/u', function (array $matches) : string {
         return \IntlChar::chr(\IntlChar::ord($matches[0]) - (\IntlChar::ord('!') - \IntlChar::ord('!')));
     }, $str);
 }
 /**
  * 入力を妥当な拡張子を除くファイル名に変換します。
  * @param string $filenameWithoutExtension NFC適用済みの拡張子を除くファイル名。
  * @return string 制御文字、および空白文字のみで構成されていた場合、ランダムな文字列生成します。
  */
 public function convertToValidFilenameWithoutExtension(string $filenameWithoutExtension) : string
 {
     /** @var string 制御文字、先頭末尾の空白を取り除いた文字列。 */
     $trimed = preg_replace('/^\\p{Z}+|\\p{C}+|\\p{Z}+$/u', '', $filenameWithoutExtension);
     return $trimed === '' ? (new FilenameValidator())->generateRandomFilename() : preg_replace_callback('/^(CON|PRN|AUX|CLOCK\\$|NUL|(COM|LPT)[1-9])$|["*.\\/:<>?\\\\|]+/i', function (array $matches) : string {
         $breakIterator = \IntlCodePointBreakIterator::createCodePointInstance();
         $breakIterator->setText($matches[0]);
         $fullWidthChars = '';
         foreach ($breakIterator as $index) {
             if ($index > 0) {
                 $fullWidthChars .= \IntlChar::chr($breakIterator->getLastCodePoint() + self::BETWEEN_HALF_AND_FULL);
             }
         }
         return $fullWidthChars;
     }, $trimed);
 }
Exemple #4
0
 /**
  * Scans a single UTF-8 encoded Unicode codepoint, which can be up to four
  * bytes long.
  *
  *  0xxx xxxx   Single-byte codepoint.
  *  110x xxxx   First of a two-byte codepoint.
  *  1110 xxxx   First of three.
  *  1111 0xxx   First of four.
  *  10xx xxxx   A continuation of any of the three preceding.
  *
  * @see https://en.wikipedia.org/wiki/UTF-8#Description
  * @see http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#page=54
  *
  * @param string $byte Initial byte. The bytestream cursor starts one
  *                     position ahead of this.
  * @return string The scanned codepoint.
  * @throws ParseException on ill-formed UTF-8.
  */
 private function scanCodepoint(string $byte) : string
 {
     $bytes = $this->byteIterator;
     $codepoint = $byte;
     $ord = ord($codepoint);
     if (!($ord >> 7)) {
         return $codepoint;
     } elseif (!($ord >> 5 ^ 0b110)) {
         $expect = 1;
     } elseif (!($ord >> 4 ^ 0b1110)) {
         $expect = 2;
     } elseif (!($ord >> 3 ^ 0b11110)) {
         $expect = 3;
     } else {
         $expect = 0;
         // This'll throw in just a moment.
     }
     while ($bytes->valid() && $expect > 0) {
         $byte = $bytes->current();
         if (ord($byte) >> 6 ^ 0b10) {
             break;
         }
         $codepoint .= $byte;
         $bytes->next();
         $expect--;
     }
     $chr = \IntlChar::chr($codepoint);
     if ($chr === null) {
         throw new ParseException(sprintf("Line %d: Ill-formed UTF-8 sequence" . str_repeat(" 0x%X", strlen($codepoint)) . ".", $this->getLineNumber(), ...array_map("ord", str_split($codepoint))));
     }
     return $chr;
 }