Example #1
0
/**
 * UTF8::strPad
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strPad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
{
    if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($pad_str)) {
        return strPad($str, $final_str_length, $pad_str, $pad_type);
    }
    $str_length = \Phalcana\UTF8::strlen($str);
    if ($final_str_length <= 0 || $final_str_length <= $str_length) {
        return $str;
    }
    $pad_str_length = \Phalcana\UTF8::strlen($pad_str);
    $pad_length = $final_str_length - $str_length;
    if ($pad_type == STR_PAD_RIGHT) {
        $repeat = ceil($pad_length / $pad_str_length);
        return \Phalcana\UTF8::substr($str . str_repeat($pad_str, $repeat), 0, $final_str_length);
    }
    if ($pad_type == STR_PAD_LEFT) {
        $repeat = ceil($pad_length / $pad_str_length);
        return \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat), 0, floor($pad_length)) . $str;
    }
    if ($pad_type == STR_PAD_BOTH) {
        $pad_length /= 2;
        $pad_length_left = floor($pad_length);
        $pad_length_right = ceil($pad_length);
        $repeat_left = ceil($pad_length_left / $pad_str_length);
        $repeat_right = ceil($pad_length_right / $pad_str_length);
        $pad_left = \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat_left), 0, $pad_length_left);
        $pad_right = \Phalcana\UTF8::substr(str_repeat($pad_str, $repeat_right), 0, $pad_length_right);
        return $pad_left . $str . $pad_right;
    }
    throw new \Phalcana\Exceptions\UTF8("Phalcana\\UTF8::strPad: Unknown padding type ({$pad_type})");
}
Example #2
0
/**
 * UTF8::strlen
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strlen($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return strlen($str);
    }
    return strlen(utf8_decode($str));
}
Example #3
0
 /**
  * Recursively cleans arrays, objects, and strings. Removes ASCII control
  * codes and converts to the requested charset while silently discarding
  * incompatible characters.
  *
  *     UTF8::clean($_GET); // Clean GET data
  *
  * @param   mixed   $var        variable to clean
  * @param   string  $charset    character set, defaults to Phalcana\Phalcana::$charset
  * @return  mixed
  * @uses    UTF8::clean
  * @uses    UTF8::stripAsciiCtrl
  * @uses    UTF8::isAscii
  */
 public static function clean($var, $charset = null)
 {
     if (!$charset) {
         // Use the application character set
         $charset = Phalcana::$charset;
     }
     if (is_array($var) || is_object($var)) {
         foreach ($var as $key => $val) {
             // Recursion!
             $var[UTF8::clean($key)] = UTF8::clean($val);
         }
     } elseif (is_string($var) && $var !== '') {
         // Remove control characters
         $var = UTF8::stripAsciiCtrl($var);
         if (!UTF8::isAscii($var)) {
             // Temporarily save the mb_substitute_character() value into a variable
             $mb_substitute_character = mb_substitute_character();
             // Disable substituting illegal characters with the default '?' character
             mb_substitute_character('none');
             // convert encoding, this is expensive, used when $var is not ASCII
             $var = mb_convert_encoding($var, $charset, $charset);
             // Reset mb_substitute_character() value back to the original setting
             mb_substitute_character($mb_substitute_character);
         }
     }
     return $var;
 }
Example #4
0
/**
 * UTF8::trim
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _trim($str, $charlist = null)
{
    if ($charlist === null) {
        return trim($str);
    }
    return \Phalcana\UTF8::ltrim(\Phalcana\UTF8::rtrim($str, $charlist), $charlist);
}
Example #5
0
/**
 * \Phalcana\UTF8::ucfirst
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _ucfirst($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return ucfirst($str);
    }
    preg_match('/^(.?)(.*)$/us', $str, $matches);
    return \Phalcana\UTF8::strtoupper($matches[1]) . $matches[2];
}
Example #6
0
/**
 * UTF8::strrev
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strrev($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return strrev($str);
    }
    preg_match_all('/./us', $str, $matches);
    return implode('', array_reverse($matches[0]));
}
Example #7
0
/**
 * UTF8::strcasecmp
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strcasecmp($str1, $str2)
{
    if (\Phalcana\UTF8::isAscii($str1) && \Phalcana\UTF8::isAscii($str2)) {
        return strcasecmp($str1, $str2);
    }
    $str1 = \Phalcana\UTF8::strtolower($str1);
    $str2 = \Phalcana\UTF8::strtolower($str2);
    return strcmp($str1, $str2);
}
Example #8
0
/**
 * UTF8::substrReplace
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _substrReplace($str, $replacement, $offset, $length = null)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return $length === null ? substrReplace($str, $replacement, $offset) : substrReplace($str, $replacement, $offset, $length);
    }
    $length = $length === null ? \Phalcana\UTF8::strlen($str) : (int) $length;
    preg_match_all('/./us', $str, $str_array);
    preg_match_all('/./us', $replacement, $replacement_array);
    array_splice($str_array[0], $offset, $length, $replacement_array[0]);
    return implode('', $str_array[0]);
}
Example #9
0
/**
 * UTF8::rtrim
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _rtrim($str, $charlist = null)
{
    if ($charlist === null) {
        return rtrim($str);
    }
    if (\Phalcana\UTF8::isAscii($charlist)) {
        return rtrim($str, $charlist);
    }
    $charlist = preg_replace('#[-\\[\\]:\\\\^/]#', '\\\\$0', $charlist);
    return preg_replace('/[' . $charlist . ']++$/uD', '', $str);
}
Example #10
0
/**
 * \Phalcana\UTF8::ucwords
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _ucwords($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return ucwords($str);
    }
    // [\x0c\x09\x0b\x0a\x0d\x20] matches form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns.
    // This corresponds to the definition of a 'word' defined at http://php.net/ucwords
    return preg_replace_callback('/(?<=^|[\\x0c\\x09\\x0b\\x0a\\x0d\\x20])[^\\x0c\\x09\\x0b\\x0a\\x0d\\x20]/u', function ($matches) {
        return \Phalcana\UTF8::strtoupper($matches[0]);
    }, $str);
}
Example #11
0
/**
 * UTF8::substr
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _substr($str, $offset, $length = null)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return $length === null ? substr($str, $offset) : substr($str, $offset, $length);
    }
    // Normalize params
    $str = (string) $str;
    $strlen = \Phalcana\UTF8::strlen($str);
    $offset = (int) ($offset < 0) ? max(0, $strlen + $offset) : $offset;
    // Normalize to positive offset
    $length = $length === null ? null : (int) $length;
    // Impossible
    if ($length === 0 || $offset >= $strlen || $length < 0 && $length <= $offset - $strlen) {
        return '';
    }
    // Whole string
    if ($offset == 0 && ($length === null || $length >= $strlen)) {
        return $str;
    }
    // Build regex
    $regex = '^';
    // Create an offset expression
    if ($offset > 0) {
        // PCRE repeating quantifiers must be less than 65536, so repeat when necessary
        $x = (int) ($offset / 65535);
        $y = (int) ($offset % 65535);
        $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}';
        $regex .= $y == 0 ? '' : '.{' . $y . '}';
    }
    // Create a length expression
    if ($length === null) {
        $regex .= '(.*)';
        // No length set, grab it all
    } elseif ($length > 0) {
        // Find length from the left (positive length)
        // Reduce length so that it can't go beyond the end of the string
        $length = min($strlen - $offset, $length);
        $x = (int) ($length / 65535);
        $y = (int) ($length % 65535);
        $regex .= '(';
        $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}';
        $regex .= '.{' . $y . '})';
    } else {
        // Find length from the right (negative length)
        $x = (int) (-$length / 65535);
        $y = (int) (-$length % 65535);
        $regex .= '(.*)';
        $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}';
        $regex .= '.{' . $y . '}';
    }
    preg_match('/' . $regex . '/us', $str, $matches);
    return $matches[1];
}
Example #12
0
/**
 * UTF8::strrpos
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strrpos($str, $search, $offset = 0)
{
    $offset = (int) $offset;
    if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($search)) {
        return strrpos($str, $search, $offset);
    }
    if ($offset == 0) {
        $array = explode($search, $str, -1);
        return isset($array[0]) ? \Phalcana\UTF8::strlen(implode($search, $array)) : false;
    }
    $str = \Phalcana\UTF8::substr($str, $offset);
    $pos = \Phalcana\UTF8::strrpos($str, $search);
    return $pos === false ? false : $pos + $offset;
}
Example #13
0
/**
 * UTF8::strSplit
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strSplit($str, $split_length = 1)
{
    $split_length = (int) $split_length;
    if (\Phalcana\UTF8::isAscii($str)) {
        return strSplit($str, $split_length);
    }
    if ($split_length < 1) {
        return false;
    }
    if (\Phalcana\UTF8::strlen($str) <= $split_length) {
        return array($str);
    }
    preg_match_all('/.{' . $split_length . '}|[^\\x00]{1,' . $split_length . '}$/us', $str, $matches);
    return $matches[0];
}
Example #14
0
/**
 * UTF8::stristr
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _stristr($str, $search)
{
    if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($search)) {
        return stristr($str, $search);
    }
    if ($search == '') {
        return $str;
    }
    $str_lower = \Phalcana\UTF8::strtolower($str);
    $search_lower = \Phalcana\UTF8::strtolower($search);
    preg_match('/^(.*?)' . preg_quote($search_lower, '/') . '/s', $str_lower, $matches);
    if (isset($matches[1])) {
        return substr($str, strlen($matches[1]));
    }
    return false;
}
Example #15
0
/**
 * UTF8::strcspn
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strcspn($str, $mask, $offset = null, $length = null)
{
    if ($str == '' || $mask == '') {
        return 0;
    }
    if (\Phalcana\UTF8::isAscii($str) && \Phalcana\UTF8::isAscii($mask)) {
        return $offset === null ? strcspn($str, $mask) : ($length === null ? strcspn($str, $mask, $offset) : strcspn($str, $mask, $offset, $length));
    }
    if ($offset !== null || $length !== null) {
        $str = \Phalcana\UTF8::substr($str, $offset, $length);
    }
    // Escape these characters:  - [ ] . : \ ^ /
    // The . and : are escaped to prevent possible warnings about POSIX regex elements
    $mask = preg_replace('#[-[\\].:\\\\^/]#', '\\\\$0', $mask);
    preg_match('/^[^' . $mask . ']+/u', $str, $matches);
    return isset($matches[0]) ? \Phalcana\UTF8::strlen($matches[0]) : 0;
}
Example #16
0
/**
 * UTF8::strtoupper
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strtoupper($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return strtoupper($str);
    }
    static $utf8_lower_to_upper = null;
    if ($utf8_lower_to_upper === null) {
        $utf8_lower_to_upper = array(0x61 => 0x41, 0x3c6 => 0x3a6, 0x163 => 0x162, 0xe5 => 0xc5, 0x62 => 0x42, 0x13a => 0x139, 0xe1 => 0xc1, 0x142 => 0x141, 0x3cd => 0x38e, 0x101 => 0x100, 0x491 => 0x490, 0x3b4 => 0x394, 0x15b => 0x15a, 0x64 => 0x44, 0x3b3 => 0x393, 0xf4 => 0xd4, 0x44a => 0x42a, 0x439 => 0x419, 0x113 => 0x112, 0x43c => 0x41c, 0x15f => 0x15e, 0x144 => 0x143, 0xee => 0xce, 0x45e => 0x40e, 0x44f => 0x42f, 0x3ba => 0x39a, 0x155 => 0x154, 0x69 => 0x49, 0x73 => 0x53, 0x1e1f => 0x1e1e, 0x135 => 0x134, 0x447 => 0x427, 0x3c0 => 0x3a0, 0x438 => 0x418, 0xf3 => 0xd3, 0x440 => 0x420, 0x454 => 0x404, 0x435 => 0x415, 0x449 => 0x429, 0x14b => 0x14a, 0x431 => 0x411, 0x459 => 0x409, 0x1e03 => 0x1e02, 0xf6 => 0xd6, 0xf9 => 0xd9, 0x6e => 0x4e, 0x451 => 0x401, 0x3c4 => 0x3a4, 0x443 => 0x423, 0x15d => 0x15c, 0x453 => 0x403, 0x3c8 => 0x3a8, 0x159 => 0x158, 0x67 => 0x47, 0xe4 => 0xc4, 0x3ac => 0x386, 0x3ae => 0x389, 0x167 => 0x166, 0x3be => 0x39e, 0x165 => 0x164, 0x117 => 0x116, 0x109 => 0x108, 0x76 => 0x56, 0xfe => 0xde, 0x157 => 0x156, 0xfa => 0xda, 0x1e61 => 0x1e60, 0x1e83 => 0x1e82, 0xe2 => 0xc2, 0x119 => 0x118, 0x146 => 0x145, 0x70 => 0x50, 0x151 => 0x150, 0x44e => 0x42e, 0x129 => 0x128, 0x3c7 => 0x3a7, 0x13e => 0x13d, 0x442 => 0x422, 0x7a => 0x5a, 0x448 => 0x428, 0x3c1 => 0x3a1, 0x1e81 => 0x1e80, 0x16d => 0x16c, 0xf5 => 0xd5, 0x75 => 0x55, 0x177 => 0x176, 0xfc => 0xdc, 0x1e57 => 0x1e56, 0x3c3 => 0x3a3, 0x43a => 0x41a, 0x6d => 0x4d, 0x16b => 0x16a, 0x171 => 0x170, 0x444 => 0x424, 0xec => 0xcc, 0x169 => 0x168, 0x3bf => 0x39f, 0x6b => 0x4b, 0xf2 => 0xd2, 0xe0 => 0xc0, 0x434 => 0x414, 0x3c9 => 0x3a9, 0x1e6b => 0x1e6a, 0xe3 => 0xc3, 0x44d => 0x42d, 0x436 => 0x416, 0x1a1 => 0x1a0, 0x10d => 0x10c, 0x11d => 0x11c, 0xf0 => 0xd0, 0x13c => 0x13b, 0x45f => 0x40f, 0x45a => 0x40a, 0xe8 => 0xc8, 0x3c5 => 0x3a5, 0x66 => 0x46, 0xfd => 0xdd, 0x63 => 0x43, 0x21b => 0x21a, 0xea => 0xca, 0x3b9 => 0x399, 0x17a => 0x179, 0xef => 0xcf, 0x1b0 => 0x1af, 0x65 => 0x45, 0x3bb => 0x39b, 0x3b8 => 0x398, 0x3bc => 0x39c, 0x45c => 0x40c, 0x43f => 0x41f, 0x44c => 0x42c, 0xfe => 0xde, 0xf0 => 0xd0, 0x1ef3 => 0x1ef2, 0x68 => 0x48, 0xeb => 0xcb, 0x111 => 0x110, 0x433 => 0x413, 0x12f => 0x12e, 0xe6 => 0xc6, 0x78 => 0x58, 0x161 => 0x160, 0x16f => 0x16e, 0x3b1 => 0x391, 0x457 => 0x407, 0x173 => 0x172, 0xff => 0x178, 0x6f => 0x4f, 0x43b => 0x41b, 0x3b5 => 0x395, 0x445 => 0x425, 0x121 => 0x120, 0x17e => 0x17d, 0x17c => 0x17b, 0x3b6 => 0x396, 0x3b2 => 0x392, 0x3ad => 0x388, 0x1e85 => 0x1e84, 0x175 => 0x174, 0x71 => 0x51, 0x437 => 0x417, 0x1e0b => 0x1e0a, 0x148 => 0x147, 0x105 => 0x104, 0x458 => 0x408, 0x14d => 0x14c, 0xed => 0xcd, 0x79 => 0x59, 0x10b => 0x10a, 0x3ce => 0x38f, 0x72 => 0x52, 0x430 => 0x410, 0x455 => 0x405, 0x452 => 0x402, 0x127 => 0x126, 0x137 => 0x136, 0x12b => 0x12a, 0x3af => 0x38a, 0x44b => 0x42b, 0x6c => 0x4c, 0x3b7 => 0x397, 0x125 => 0x124, 0x219 => 0x218, 0xfb => 0xdb, 0x11f => 0x11e, 0x43e => 0x41e, 0x1e41 => 0x1e40, 0x3bd => 0x39d, 0x107 => 0x106, 0x3cb => 0x3ab, 0x446 => 0x426, 0xfe => 0xde, 0xe7 => 0xc7, 0x3ca => 0x3aa, 0x441 => 0x421, 0x432 => 0x412, 0x10f => 0x10e, 0xf8 => 0xd8, 0x77 => 0x57, 0x11b => 0x11a, 0x74 => 0x54, 0x6a => 0x4a, 0x45b => 0x40b, 0x456 => 0x406, 0x103 => 0x102, 0x3bb => 0x39b, 0xf1 => 0xd1, 0x43d => 0x41d, 0x3cc => 0x38c, 0xe9 => 0xc9, 0xf0 => 0xd0, 0x457 => 0x407, 0x123 => 0x122);
    }
    $uni = \Phalcana\UTF8::toUnicode($str);
    if ($uni === false) {
        return false;
    }
    for ($i = 0, $c = count($uni); $i < $c; $i++) {
        if (isset($utf8_lower_to_upper[$uni[$i]])) {
            $uni[$i] = $utf8_lower_to_upper[$uni[$i]];
        }
    }
    return \Phalcana\UTF8::fromUnicode($uni);
}
Example #17
0
/**
 * UTF8::strtolower
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strtolower($str)
{
    if (\Phalcana\UTF8::isAscii($str)) {
        return strtolower($str);
    }
    static $utf8_upper_to_lower = null;
    if ($utf8_upper_to_lower === null) {
        $utf8_upper_to_lower = array(0x41 => 0x61, 0x3a6 => 0x3c6, 0x162 => 0x163, 0xc5 => 0xe5, 0x42 => 0x62, 0x139 => 0x13a, 0xc1 => 0xe1, 0x141 => 0x142, 0x38e => 0x3cd, 0x100 => 0x101, 0x490 => 0x491, 0x394 => 0x3b4, 0x15a => 0x15b, 0x44 => 0x64, 0x393 => 0x3b3, 0xd4 => 0xf4, 0x42a => 0x44a, 0x419 => 0x439, 0x112 => 0x113, 0x41c => 0x43c, 0x15e => 0x15f, 0x143 => 0x144, 0xce => 0xee, 0x40e => 0x45e, 0x42f => 0x44f, 0x39a => 0x3ba, 0x154 => 0x155, 0x49 => 0x69, 0x53 => 0x73, 0x1e1e => 0x1e1f, 0x134 => 0x135, 0x427 => 0x447, 0x3a0 => 0x3c0, 0x418 => 0x438, 0xd3 => 0xf3, 0x420 => 0x440, 0x404 => 0x454, 0x415 => 0x435, 0x429 => 0x449, 0x14a => 0x14b, 0x411 => 0x431, 0x409 => 0x459, 0x1e02 => 0x1e03, 0xd6 => 0xf6, 0xd9 => 0xf9, 0x4e => 0x6e, 0x401 => 0x451, 0x3a4 => 0x3c4, 0x423 => 0x443, 0x15c => 0x15d, 0x403 => 0x453, 0x3a8 => 0x3c8, 0x158 => 0x159, 0x47 => 0x67, 0xc4 => 0xe4, 0x386 => 0x3ac, 0x389 => 0x3ae, 0x166 => 0x167, 0x39e => 0x3be, 0x164 => 0x165, 0x116 => 0x117, 0x108 => 0x109, 0x56 => 0x76, 0xde => 0xfe, 0x156 => 0x157, 0xda => 0xfa, 0x1e60 => 0x1e61, 0x1e82 => 0x1e83, 0xc2 => 0xe2, 0x118 => 0x119, 0x145 => 0x146, 0x50 => 0x70, 0x150 => 0x151, 0x42e => 0x44e, 0x128 => 0x129, 0x3a7 => 0x3c7, 0x13d => 0x13e, 0x422 => 0x442, 0x5a => 0x7a, 0x428 => 0x448, 0x3a1 => 0x3c1, 0x1e80 => 0x1e81, 0x16c => 0x16d, 0xd5 => 0xf5, 0x55 => 0x75, 0x176 => 0x177, 0xdc => 0xfc, 0x1e56 => 0x1e57, 0x3a3 => 0x3c3, 0x41a => 0x43a, 0x4d => 0x6d, 0x16a => 0x16b, 0x170 => 0x171, 0x424 => 0x444, 0xcc => 0xec, 0x168 => 0x169, 0x39f => 0x3bf, 0x4b => 0x6b, 0xd2 => 0xf2, 0xc0 => 0xe0, 0x414 => 0x434, 0x3a9 => 0x3c9, 0x1e6a => 0x1e6b, 0xc3 => 0xe3, 0x42d => 0x44d, 0x416 => 0x436, 0x1a0 => 0x1a1, 0x10c => 0x10d, 0x11c => 0x11d, 0xd0 => 0xf0, 0x13b => 0x13c, 0x40f => 0x45f, 0x40a => 0x45a, 0xc8 => 0xe8, 0x3a5 => 0x3c5, 0x46 => 0x66, 0xdd => 0xfd, 0x43 => 0x63, 0x21a => 0x21b, 0xca => 0xea, 0x399 => 0x3b9, 0x179 => 0x17a, 0xcf => 0xef, 0x1af => 0x1b0, 0x45 => 0x65, 0x39b => 0x3bb, 0x398 => 0x3b8, 0x39c => 0x3bc, 0x40c => 0x45c, 0x41f => 0x43f, 0x42c => 0x44c, 0xde => 0xfe, 0xd0 => 0xf0, 0x1ef2 => 0x1ef3, 0x48 => 0x68, 0xcb => 0xeb, 0x110 => 0x111, 0x413 => 0x433, 0x12e => 0x12f, 0xc6 => 0xe6, 0x58 => 0x78, 0x160 => 0x161, 0x16e => 0x16f, 0x391 => 0x3b1, 0x407 => 0x457, 0x172 => 0x173, 0x178 => 0xff, 0x4f => 0x6f, 0x41b => 0x43b, 0x395 => 0x3b5, 0x425 => 0x445, 0x120 => 0x121, 0x17d => 0x17e, 0x17b => 0x17c, 0x396 => 0x3b6, 0x392 => 0x3b2, 0x388 => 0x3ad, 0x1e84 => 0x1e85, 0x174 => 0x175, 0x51 => 0x71, 0x417 => 0x437, 0x1e0a => 0x1e0b, 0x147 => 0x148, 0x104 => 0x105, 0x408 => 0x458, 0x14c => 0x14d, 0xcd => 0xed, 0x59 => 0x79, 0x10a => 0x10b, 0x38f => 0x3ce, 0x52 => 0x72, 0x410 => 0x430, 0x405 => 0x455, 0x402 => 0x452, 0x126 => 0x127, 0x136 => 0x137, 0x12a => 0x12b, 0x38a => 0x3af, 0x42b => 0x44b, 0x4c => 0x6c, 0x397 => 0x3b7, 0x124 => 0x125, 0x218 => 0x219, 0xdb => 0xfb, 0x11e => 0x11f, 0x41e => 0x43e, 0x1e40 => 0x1e41, 0x39d => 0x3bd, 0x106 => 0x107, 0x3ab => 0x3cb, 0x426 => 0x446, 0xde => 0xfe, 0xc7 => 0xe7, 0x3aa => 0x3ca, 0x421 => 0x441, 0x412 => 0x432, 0x10e => 0x10f, 0xd8 => 0xf8, 0x57 => 0x77, 0x11a => 0x11b, 0x54 => 0x74, 0x4a => 0x6a, 0x40b => 0x45b, 0x406 => 0x456, 0x102 => 0x103, 0x39b => 0x3bb, 0xd1 => 0xf1, 0x41d => 0x43d, 0x38c => 0x3cc, 0xc9 => 0xe9, 0xd0 => 0xf0, 0x407 => 0x457, 0x122 => 0x123);
    }
    $uni = \Phalcana\UTF8::toUnicode($str);
    if ($uni === false) {
        return false;
    }
    for ($i = 0, $c = count($uni); $i < $c; $i++) {
        if (isset($utf8_upper_to_lower[$uni[$i]])) {
            $uni[$i] = $utf8_upper_to_lower[$uni[$i]];
        }
    }
    return \Phalcana\UTF8::fromUnicode($uni);
}
Example #18
0
/**
 * UTF8::strIreplace
 *
 * @package    Kohana
 * @author     Kohana Team
 * @copyright  (c) 2007-2012 Kohana Team
 * @copyright  (c) 2005 Harry Fuecks
 * @license    http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
 */
function _strIreplace($search, $replace, $str, &$count = null)
{
    if (\Phalcana\UTF8::isAscii($search) && \Phalcana\UTF8::isAscii($replace) && \Phalcana\UTF8::isAscii($str)) {
        return str_ireplace($search, $replace, $str, $count);
    }
    if (is_array($str)) {
        foreach ($str as $key => $val) {
            $str[$key] = \Phalcana\UTF8::strIreplace($search, $replace, $val, $count);
        }
        return $str;
    }
    if (is_array($search)) {
        $keys = array_keys($search);
        foreach ($keys as $k) {
            if (is_array($replace)) {
                if (array_key_exists($k, $replace)) {
                    $str = \Phalcana\UTF8::strIreplace($search[$k], $replace[$k], $str, $count);
                } else {
                    $str = \Phalcana\UTF8::strIreplace($search[$k], '', $str, $count);
                }
            } else {
                $str = \Phalcana\UTF8::strIreplace($search[$k], $replace, $str, $count);
            }
        }
        return $str;
    }
    $search = \Phalcana\UTF8::strtolower($search);
    $str_lower = \Phalcana\UTF8::strtolower($str);
    $total_matched_strlen = 0;
    $i = 0;
    while (preg_match('/(.*?)' . preg_quote($search, '/') . '/s', $str_lower, $matches)) {
        $matched_strlen = strlen($matches[0]);
        $str_lower = substr($str_lower, $matched_strlen);
        $offset = $total_matched_strlen + strlen($matches[1]) + $i * (strlen($replace) - 1);
        $str = substr_replace($str, $replace, $offset, strlen($search));
        $total_matched_strlen += $matched_strlen;
        $i++;
    }
    $count += $i;
    return $str;
}
Example #19
0
 /**
  * Tests UTF8::ord
  *
  * @test
  * @dataProvider provider_ord
  */
 public function test_ord($input, $expected)
 {
     $this->assertSame($expected, UTF8::ord($input));
 }
Example #20
0
 /**
  * Replaces the given words with a string.
  *
  *     // Displays "What the #####, man!"
  *     echo Text::censor('What the frick, man!', array(
  *         'frick' => '#####',
  *     ));
  *
  * @param   string  $str                    phrase to replace words in
  * @param   array   $badwords               words to replace
  * @param   string  $replacement            replacement string
  * @param   boolean $replace_partial_words  replace words across word boundaries (space, period, etc)
  * @return  string
  * @uses    UTF8::strlen
  */
 public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = true)
 {
     foreach ((array) $badwords as $key => $badword) {
         $badwords[$key] = str_replace('\\*', '\\S*?', preg_quote((string) $badword));
     }
     $regex = '(' . implode('|', $badwords) . ')';
     if ($replace_partial_words === false) {
         // Just using \b isn't sufficient when we need to replace a badword that
         // already contains word boundaries itself
         $regex = '(?<=\\b|\\s|^)' . $regex . '(?=\\b|\\s|$)';
     }
     $regex = '!' . $regex . '!ui';
     // if $replacement is a single character: replace each of the characters of the badword with $replacement
     if (UTF8::strlen($replacement) == 1) {
         return preg_replace_callback($regex, function ($matches) use($replacement) {
             return str_repeat($replacement, UTF8::strlen($matches[1]));
         }, $str);
     }
     // if $replacement is not a single character, fully replace the badword with $replacement
     return preg_replace($regex, $replacement, $str);
 }