/** * UTF8::strPad * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strPad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT) { if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($pad_str)) { return strPad($str, $final_str_length, $pad_str, $pad_type); } $str_length = \Phalcana\UTF8::strlen($str); if ($final_str_length <= 0 || $final_str_length <= $str_length) { return $str; } $pad_str_length = \Phalcana\UTF8::strlen($pad_str); $pad_length = $final_str_length - $str_length; if ($pad_type == STR_PAD_RIGHT) { $repeat = ceil($pad_length / $pad_str_length); return \Phalcana\UTF8::substr($str . str_repeat($pad_str, $repeat), 0, $final_str_length); } if ($pad_type == STR_PAD_LEFT) { $repeat = ceil($pad_length / $pad_str_length); return \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat), 0, floor($pad_length)) . $str; } if ($pad_type == STR_PAD_BOTH) { $pad_length /= 2; $pad_length_left = floor($pad_length); $pad_length_right = ceil($pad_length); $repeat_left = ceil($pad_length_left / $pad_str_length); $repeat_right = ceil($pad_length_right / $pad_str_length); $pad_left = \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat_left), 0, $pad_length_left); $pad_right = \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat_right), 0, $pad_length_right); return $pad_left . $str . $pad_right; } throw new \Phalcana\Exceptions\UTF8("Phalcana\\UTF8::strPad: Unknown padding type ({$pad_type})"); }
/** * UTF8::substrReplace * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _substrReplace($str, $replacement, $offset, $length = null) { if (\Phalcana\UTF8::isAscii($str)) { return $length === null ? substrReplace($str, $replacement, $offset) : substrReplace($str, $replacement, $offset, $length); } $length = $length === null ? \Phalcana\UTF8::strlen($str) : (int) $length; preg_match_all('/./us', $str, $str_array); preg_match_all('/./us', $replacement, $replacement_array); array_splice($str_array[0], $offset, $length, $replacement_array[0]); return implode('', $str_array[0]); }
/** * UTF8::substr * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _substr($str, $offset, $length = null) { if (\Phalcana\UTF8::isAscii($str)) { return $length === null ? substr($str, $offset) : substr($str, $offset, $length); } // Normalize params $str = (string) $str; $strlen = \Phalcana\UTF8::strlen($str); $offset = (int) ($offset < 0) ? max(0, $strlen + $offset) : $offset; // Normalize to positive offset $length = $length === null ? null : (int) $length; // Impossible if ($length === 0 || $offset >= $strlen || $length < 0 && $length <= $offset - $strlen) { return ''; } // Whole string if ($offset == 0 && ($length === null || $length >= $strlen)) { return $str; } // Build regex $regex = '^'; // Create an offset expression if ($offset > 0) { // PCRE repeating quantifiers must be less than 65536, so repeat when necessary $x = (int) ($offset / 65535); $y = (int) ($offset % 65535); $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= $y == 0 ? '' : '.{' . $y . '}'; } // Create a length expression if ($length === null) { $regex .= '(.*)'; // No length set, grab it all } elseif ($length > 0) { // Find length from the left (positive length) // Reduce length so that it can't go beyond the end of the string $length = min($strlen - $offset, $length); $x = (int) ($length / 65535); $y = (int) ($length % 65535); $regex .= '('; $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= '.{' . $y . '})'; } else { // Find length from the right (negative length) $x = (int) (-$length / 65535); $y = (int) (-$length % 65535); $regex .= '(.*)'; $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= '.{' . $y . '}'; } preg_match('/' . $regex . '/us', $str, $matches); return $matches[1]; }
/** * UTF8::strrpos * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strrpos($str, $search, $offset = 0) { $offset = (int) $offset; if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($search)) { return strrpos($str, $search, $offset); } if ($offset == 0) { $array = explode($search, $str, -1); return isset($array[0]) ? \Phalcana\UTF8::strlen(implode($search, $array)) : false; } $str = \Phalcana\UTF8::substr($str, $offset); $pos = \Phalcana\UTF8::strrpos($str, $search); return $pos === false ? false : $pos + $offset; }
/** * UTF8::strSplit * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strSplit($str, $split_length = 1) { $split_length = (int) $split_length; if (\Phalcana\UTF8::isAscii($str)) { return strSplit($str, $split_length); } if ($split_length < 1) { return false; } if (\Phalcana\UTF8::strlen($str) <= $split_length) { return array($str); } preg_match_all('/.{' . $split_length . '}|[^\\x00]{1,' . $split_length . '}$/us', $str, $matches); return $matches[0]; }
/** * UTF8::strcspn * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strcspn($str, $mask, $offset = null, $length = null) { if ($str == '' || $mask == '') { return 0; } if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($mask)) { return $offset === null ? strcspn($str, $mask) : ($length === null ? strcspn($str, $mask, $offset) : strcspn($str, $mask, $offset, $length)); } if ($offset !== null || $length !== null) { $str = \Phalcana\UTF8::substr($str, $offset, $length); } // Escape these characters: - [ ] . : \ ^ / // The . and : are escaped to prevent possible warnings about POSIX regex elements $mask = preg_replace('#[-[\\].:\\\\^/]#', '\\\\$0', $mask); preg_match('/^[^' . $mask . ']+/u', $str, $matches); return isset($matches[0]) ? \Phalcana\UTF8::strlen($matches[0]) : 0; }
/** * Replaces the given words with a string. * * // Displays "What the #####, man!" * echo Text::censor('What the frick, man!', array( * 'frick' => '#####', * )); * * @param string $str phrase to replace words in * @param array $badwords words to replace * @param string $replacement replacement string * @param boolean $replace_partial_words replace words across word boundaries (space, period, etc) * @return string * @uses UTF8::strlen */ public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = true) { foreach ((array) $badwords as $key => $badword) { $badwords[$key] = str_replace('\\*', '\\S*?', preg_quote((string) $badword)); } $regex = '(' . implode('|', $badwords) . ')'; if ($replace_partial_words === false) { // Just using \b isn't sufficient when we need to replace a badword that // already contains word boundaries itself $regex = '(?<=\\b|\\s|^)' . $regex . '(?=\\b|\\s|$)'; } $regex = '!' . $regex . '!ui'; // if $replacement is a single character: replace each of the characters of the badword with $replacement if (UTF8::strlen($replacement) == 1) { return preg_replace_callback($regex, function ($matches) use($replacement) { return str_repeat($replacement, UTF8::strlen($matches[1])); }, $str); } // if $replacement is not a single character, fully replace the badword with $replacement return preg_replace($regex, $replacement, $str); }
/** * Tests UTF8::strlen * * @test * @dataProvider provider_strlen */ public function test_strlen($input, $expected) { $this->assertSame($expected, UTF8::strlen($input)); }