/** * 「ああ」「あい」のように組み合わせた2文字のひらがなによる配列を返します。 * @return string[] 要素の数は500以上。 */ protected function generateHiraganaWords() : array { if ($this->hiraganaWords) { return $this->hiraganaWords; } for ($i = \IntlChar::ord('ぁ'), $l = $i + sqrt(500); $i < $l; $i++) { $firsts[] = \IntlChar::chr($i); } $seconds = $firsts; foreach ($firsts as $first) { foreach ($seconds as $second) { $this->hiraganaWords[] = "{$first}{$second}"; } } return $this->hiraganaWords; }
/** * スペースを除くASCII印字可能文字を、対応する全角形に置き換えます。 * @param string $str * @return string */ protected function convertToFullwidth(string $str) : string { return preg_replace_callback('/[!-~]/u', function (array $matches) : string { return \IntlChar::chr(\IntlChar::ord($matches[0]) - (\IntlChar::ord('!') - \IntlChar::ord('!'))); }, $str); }
/** * 入力を妥当な拡張子を除くファイル名に変換します。 * @param string $filenameWithoutExtension NFC適用済みの拡張子を除くファイル名。 * @return string 制御文字、および空白文字のみで構成されていた場合、ランダムな文字列生成します。 */ public function convertToValidFilenameWithoutExtension(string $filenameWithoutExtension) : string { /** @var string 制御文字、先頭末尾の空白を取り除いた文字列。 */ $trimed = preg_replace('/^\\p{Z}+|\\p{C}+|\\p{Z}+$/u', '', $filenameWithoutExtension); return $trimed === '' ? (new FilenameValidator())->generateRandomFilename() : preg_replace_callback('/^(CON|PRN|AUX|CLOCK\\$|NUL|(COM|LPT)[1-9])$|["*.\\/:<>?\\\\|]+/i', function (array $matches) : string { $breakIterator = \IntlCodePointBreakIterator::createCodePointInstance(); $breakIterator->setText($matches[0]); $fullWidthChars = ''; foreach ($breakIterator as $index) { if ($index > 0) { $fullWidthChars .= \IntlChar::chr($breakIterator->getLastCodePoint() + self::BETWEEN_HALF_AND_FULL); } } return $fullWidthChars; }, $trimed); }
/** * Scans a single UTF-8 encoded Unicode codepoint, which can be up to four * bytes long. * * 0xxx xxxx Single-byte codepoint. * 110x xxxx First of a two-byte codepoint. * 1110 xxxx First of three. * 1111 0xxx First of four. * 10xx xxxx A continuation of any of the three preceding. * * @see https://en.wikipedia.org/wiki/UTF-8#Description * @see http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#page=54 * * @param string $byte Initial byte. The bytestream cursor starts one * position ahead of this. * @return string The scanned codepoint. * @throws ParseException on ill-formed UTF-8. */ private function scanCodepoint(string $byte) : string { $bytes = $this->byteIterator; $codepoint = $byte; $ord = ord($codepoint); if (!($ord >> 7)) { return $codepoint; } elseif (!($ord >> 5 ^ 0b110)) { $expect = 1; } elseif (!($ord >> 4 ^ 0b1110)) { $expect = 2; } elseif (!($ord >> 3 ^ 0b11110)) { $expect = 3; } else { $expect = 0; // This'll throw in just a moment. } while ($bytes->valid() && $expect > 0) { $byte = $bytes->current(); if (ord($byte) >> 6 ^ 0b10) { break; } $codepoint .= $byte; $bytes->next(); $expect--; } $chr = \IntlChar::chr($codepoint); if ($chr === null) { throw new ParseException(sprintf("Line %d: Ill-formed UTF-8 sequence" . str_repeat(" 0x%X", strlen($codepoint)) . ".", $this->getLineNumber(), ...array_map("ord", str_split($codepoint)))); } return $chr; }