/** * UTF8::str_pad * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT) { if (UTF8::is_ascii($str) and UTF8::is_ascii($pad_str)) { return str_pad($str, $final_str_length, $pad_str, $pad_type); } $str_length = UTF8::strlen($str); if ($final_str_length <= 0 or $final_str_length <= $str_length) { return $str; } $pad_str_length = UTF8::strlen($pad_str); $pad_length = $final_str_length - $str_length; if ($pad_type == STR_PAD_RIGHT) { $repeat = ceil($pad_length / $pad_str_length); return UTF8::substr($str . str_repeat($pad_str, $repeat), 0, $final_str_length); } if ($pad_type == STR_PAD_LEFT) { $repeat = ceil($pad_length / $pad_str_length); return UTF8::substr(str_repeat($pad_str, $repeat), 0, floor($pad_length)) . $str; } if ($pad_type == STR_PAD_BOTH) { $pad_length /= 2; $pad_length_left = floor($pad_length); $pad_length_right = ceil($pad_length); $repeat_left = ceil($pad_length_left / $pad_str_length); $repeat_right = ceil($pad_length_right / $pad_str_length); $pad_left = UTF8::substr(str_repeat($pad_str, $repeat_left), 0, $pad_length_left); $pad_right = UTF8::substr(str_repeat($pad_str, $repeat_right), 0, $pad_length_right); return $pad_left . $str . $pad_right; } throw new UTF8_Exception("UTF8::str_pad: Unknown padding type (:pad_type)", [':pad_type' => $pad_type]); }
public function action_ent() { $answers = Security::xss_clean(Arr::get($_POST, 'answers', '')); $answers = UTF8::substr($answers, 0, UTF8::strlen($answers) - 1); $list = explode(',', $answers); $total = 0; $right = 0; $points = array(); foreach ($list as $item) { $total++; $e = explode('.', $item); $quest = ORM::factory('Ent_Quest', (int) $e[0]); if ($quest->loaded()) { $variant = ORM::factory('Quest_Variant')->where('quest_id', '=', $quest->id)->and_where('id', '=', (int) $e[1])->and_where('right', '=', '1')->find(); if ($variant->loaded()) { $right++; $points[] = array('quest' => $quest->id, 'right' => 1); } else { $points[] = array('quest' => $quest->id, 'right' => 0); } } } $data = array('total' => $total, 'right' => $right, 'points' => $points); $this->response->body(json_encode($data)); }
public function snippet($text = '') { $description = ''; $chars = array('.', '!', '?', ':', '"'); if (!empty($text)) { $text = strip_tags($text); $arr = explode(' ', $text); foreach ($arr as $k => $v) { if (!empty($v)) { $countdescription = UTF8::strlen($description); $countword = UTF8::strlen($v); if ($countdescription - 1 + $countword > 140) { break; } else { $description .= $v . ' '; } } } $description = rtrim($description); if (!empty($description)) { $lastchar = $description[UTF8::strlen($description) - 1]; if ($lastchar == ',') { $description = UTF8::substr($description, 0, UTF8::strlen($description) - 1); } if (!in_array($lastchar, $chars)) { $description .= '...'; } } } $this->description = $description; return $this; }
/** * Outputs the Captcha image. * * @param boolean $html HTML output * @return mixed */ public function render($html = TRUE) { // Creates a black image to start from $this->image_create(Captcha::$config['background']); // Add random white/gray arcs, amount depends on complexity setting $count = (Captcha::$config['width'] + Captcha::$config['height']) / 2; $count = $count / 5 * min(10, Captcha::$config['complexity']); for ($i = 0; $i < $count; $i++) { imagesetthickness($this->image, mt_rand(1, 2)); $color = imagecolorallocatealpha($this->image, 255, 255, 255, mt_rand(0, 120)); imagearc($this->image, mt_rand(-Captcha::$config['width'], Captcha::$config['width']), mt_rand(-Captcha::$config['height'], Captcha::$config['height']), mt_rand(-Captcha::$config['width'], Captcha::$config['width']), mt_rand(-Captcha::$config['height'], Captcha::$config['height']), mt_rand(0, 360), mt_rand(0, 360), $color); } // Use different fonts if available $font = Captcha::$config['fontpath'] . Captcha::$config['fonts'][array_rand(Captcha::$config['fonts'])]; // Draw the character's white shadows $size = (int) min(Captcha::$config['height'] / 2, Captcha::$config['width'] * 0.8 / UTF8::strlen($this->response)); $angle = mt_rand(-15 + UTF8::strlen($this->response), 15 - UTF8::strlen($this->response)); $x = mt_rand(1, Captcha::$config['width'] * 0.9 - $size * UTF8::strlen($this->response)); $y = (Captcha::$config['height'] - $size) / 2 + $size; $color = imagecolorallocate($this->image, 255, 255, 255); imagefttext($this->image, $size, $angle, $x + 1, $y + 1, $color, $font, $this->response); // Add more shadows for lower complexities Captcha::$config['complexity'] < 10 and imagefttext($this->image, $size, $angle, $x - 1, $y - 1, $color, $font, $this->response); Captcha::$config['complexity'] < 8 and imagefttext($this->image, $size, $angle, $x - 2, $y + 2, $color, $font, $this->response); Captcha::$config['complexity'] < 6 and imagefttext($this->image, $size, $angle, $x + 2, $y - 2, $color, $font, $this->response); Captcha::$config['complexity'] < 4 and imagefttext($this->image, $size, $angle, $x + 3, $y + 3, $color, $font, $this->response); Captcha::$config['complexity'] < 2 and imagefttext($this->image, $size, $angle, $x - 3, $y - 3, $color, $font, $this->response); // Finally draw the foreground characters $color = imagecolorallocate($this->image, 0, 0, 0); imagefttext($this->image, $size, $angle, $x, $y, $color, $font, $this->response); // Output return $this->image_render($html); }
protected function _pad_word(&$word, $letter, $length = 4) { $l = UTF8::strlen($word); if ($l < $length) { $word = str_repeat($letter, $length - $l) . $word; } }
/** * UTF8::str_pad * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2010 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT) { if (UTF8::is_ascii($str) and UTF8::is_ascii($pad_str)) { return str_pad($str, $final_str_length, $pad_str, $pad_type); } $str_length = UTF8::strlen($str); if ($final_str_length <= 0 or $final_str_length <= $str_length) { return $str; } $pad_str_length = UTF8::strlen($pad_str); $pad_length = $final_str_length - $str_length; if ($pad_type == STR_PAD_RIGHT) { $repeat = ceil($pad_length / $pad_str_length); return UTF8::substr($str . str_repeat($pad_str, $repeat), 0, $final_str_length); } if ($pad_type == STR_PAD_LEFT) { $repeat = ceil($pad_length / $pad_str_length); return UTF8::substr(str_repeat($pad_str, $repeat), 0, floor($pad_length)) . $str; } if ($pad_type == STR_PAD_BOTH) { $pad_length /= 2; $pad_length_left = floor($pad_length); $pad_length_right = ceil($pad_length); $repeat_left = ceil($pad_length_left / $pad_str_length); $repeat_right = ceil($pad_length_right / $pad_str_length); $pad_left = UTF8::substr(str_repeat($pad_str, $repeat_left), 0, $pad_length_left); $pad_right = UTF8::substr(str_repeat($pad_str, $repeat_right), 0, $pad_length_right); return $pad_left . $str . $pad_right; } trigger_error('UTF8::str_pad: Unknown padding type (' . $pad_type . ')', E_USER_ERROR); }
/** * UTF8::wordwrap * Taken and adapted form Zend Framework by Ivan Tcholakov, 2015. * * @param string The input string. * @param int The number of characters at which the string will be wrapped. * @param string The line is broken using the optional break parameter. * @param bool If the cut is set to TRUE, the string is always wrapped at or before the specified width. * @return string|false * @license @license http://framework.zend.com/license/new-bsd New BSD License */ function _wordwrap($string, $width = 75, $break = "\n", $cut = false) { $string = @(string) $string; if ($string === '') { return ''; } $break = @(string) $break; if ($break === '') { trigger_error('UTF8::wordwrap(): Break string cannot be empty.', E_USER_WARNING); return false; } $width = (int) $width; if ($width === 0 && $cut) { trigger_error('UTF8::wordwrap(): Cannot force cut when width is zero.', E_USER_WARNING); return false; } $string_length = UTF8::strlen($string); $break_length = UTF8::strlen($break); $result = ''; $last_start = 0; $last_space = 0; for ($current = 0; $current < $string_length; $current++) { $char = UTF8::substr($string, $current, 1); $possible_break = $char; if ($break_length !== 1) { $possible_break = UTF8::substr($string, $current, $break_length); } if ($possible_break === $break) { $result .= UTF8::substr($string, $last_start, $current - $last_start + $break_length); $current += $break_length - 1; $last_start = $last_space = $current + 1; continue; } if ($char === ' ') { if ($current - $last_start >= $width) { $result .= UTF8::substr($string, $last_start, $current - $last_start) . $break; $last_start = $current + 1; } $last_space = $current; continue; } if ($current - $last_start >= $width && $cut && $last_start >= $last_space) { $result .= UTF8::substr($string, $last_start, $current - $last_start) . $break; $last_start = $last_space = $current; continue; } if ($current - $last_start >= $width && $last_start < $last_space) { $result .= UTF8::substr($string, $last_start, $last_space - $last_start) . $break; $last_start = $last_space = $last_space + 1; continue; } } if ($last_start !== $current) { $result .= UTF8::substr($string, $last_start, $current - $last_start); } return $result; }
public function exact_length($str, $val) { if (!is_numeric($val)) { return false; } else { $val = (int) $val; } return IS_UTF8_CHARSET ? UTF8::strlen($str) === $val : strlen($str) === $val; }
public function clean($replacement = '#', $replace_partial_words = FALSE) { $regex = $this->get_pattern($replace_partial_words); if (UTF8::strlen($replacement) == 1) { $regex .= 'e'; return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $this->string); } return preg_replace($regex, $replacement, $this->string); }
/** * UTF8::substr_replace * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _substr_replace($str, $replacement, $offset, $length = null) { if (UTF8::is_ascii($str)) { return $length === null ? substr_replace($str, $replacement, $offset) : substr_replace($str, $replacement, $offset, $length); } $length = $length === null ? UTF8::strlen($str) : (int) $length; preg_match_all('/./us', $str, $str_array); preg_match_all('/./us', $replacement, $replacement_array); array_splice($str_array[0], $offset, $length, $replacement_array[0]); return implode('', $str_array[0]); }
private function countThis($string) { $string = strip_tags($string); $e = explode(" ", $string); foreach ($e as $k => $word) { if (UTF8::strlen($word) < 2) { unset($e[$k]); } } return count($e); }
/** * UTF8::substr * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _substr($str, $offset, $length = NULL) { if (UTF8::is_ascii($str)) { return $length === NULL ? substr($str, $offset) : substr($str, $offset, $length); } // Normalize params $str = (string) $str; $strlen = UTF8::strlen($str); $offset = (int) ($offset < 0) ? max(0, $strlen + $offset) : $offset; // Normalize to positive offset $length = $length === NULL ? NULL : (int) $length; // Impossible if ($length === 0 or $offset >= $strlen or $length < 0 and $length <= $offset - $strlen) { return ''; } // Whole string if ($offset == 0 and ($length === NULL or $length >= $strlen)) { return $str; } // Build regex $regex = '^'; // Create an offset expression if ($offset > 0) { // PCRE repeating quantifiers must be less than 65536, so repeat when necessary $x = (int) ($offset / 65535); $y = (int) ($offset % 65535); $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= $y == 0 ? '' : '.{' . $y . '}'; } // Create a length expression if ($length === NULL) { $regex .= '(.*)'; // No length set, grab it all } elseif ($length > 0) { // Reduce length so that it can't go beyond the end of the string $length = min($strlen - $offset, $length); $x = (int) ($length / 65535); $y = (int) ($length % 65535); $regex .= '('; $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= '.{' . $y . '})'; } else { $x = (int) (-$length / 65535); $y = (int) (-$length % 65535); $regex .= '(.*)'; $regex .= $x == 0 ? '' : '(?:.{65535}){' . $x . '}'; $regex .= '.{' . $y . '}'; } preg_match('/' . $regex . '/us', $str, $matches); return $matches[1]; }
/** * UTF8::strrpos * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strrpos($str, $search, $offset = 0) { $offset = (int) $offset; if (UTF8::is_ascii($str) and UTF8::is_ascii($search)) { return strrpos($str, $search, $offset); } if ($offset == 0) { $array = explode($search, $str, -1); return isset($array[0]) ? UTF8::strlen(implode($search, $array)) : FALSE; } $str = UTF8::substr($str, $offset); $pos = UTF8::strrpos($str, $search); return $pos === FALSE ? FALSE : $pos + $offset; }
/** * UTF8::strpos * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strpos($str, $search, $offset = 0) { $offset = (int) $offset; if (UTF8::is_ascii($str) and UTF8::is_ascii($search)) { return strpos($str, $search, $offset); } if ($offset == 0) { $array = explode($search, $str, 2); return isset($array[1]) ? UTF8::strlen($array[0]) : false; } $str = UTF8::substr($str, $offset); $pos = UTF8::strpos($str, $search); return $pos === false ? false : $pos + $offset; }
/** * Generates a new Captcha challenge. * * @return string The challenge answer */ public function generate_challenge() { // Load words from the current language and randomize them $words = Config::get('captcha', 'words'); shuffle($words); // Loop over each word... foreach ($words as $word) { // ...until we find one of the desired length if (abs(Captcha::$config['complexity'] - UTF8::strlen($word)) < 2) { return UTF8::strtoupper($word); } } // Return any random word as final fallback return UTF8::strtoupper($words[array_rand($words)]); }
/** * UTF8::str_split * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _str_split($str, $split_length = 1) { $split_length = (int) $split_length; if (UTF8::is_ascii($str)) { return str_split($str, $split_length); } if ($split_length < 1) { return FALSE; } if (UTF8::strlen($str) <= $split_length) { return array($str); } preg_match_all('/.{' . $split_length . '}|[^\\x00]{1,' . $split_length . '}$/us', $str, $matches); return $matches[0]; }
protected function _is_valid(&$token) { if (!$token) { return FALSE; } if (UTF8::strlen($token) > 32) { return FALSE; } if (isset($this->stopwords[$token])) { return FALSE; } if (strspn($token, '[]<>-_$.\\+*/') == strlen($token)) { return FALSE; } return TRUE; }
/** * UTF8::strpos * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2010 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strpos($str, $search, $offset = 0) { $offset = (int) $offset; if (UTF8::is_ascii($str) AND UTF8::is_ascii($search)) return strpos($str, $search, $offset); if ($offset == 0) { $array = explode($search, $str, 2); return isset($array[1]) ? UTF8::strlen($array[0]) : FALSE; } $str = UTF8::substr($str, $offset); $pos = UTF8::strpos($str, $search); return ($pos === FALSE) ? FALSE : ($pos + $offset); }
/** * UTF8::strcspn * * @package Kohana * @author Kohana Team * @copyright (c) 2007-2012 Kohana Team * @copyright (c) 2005 Harry Fuecks * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt */ function _strcspn($str, $mask, $offset = NULL, $length = NULL) { if ($str == '' or $mask == '') { return 0; } if (UTF8::is_ascii($str) and UTF8::is_ascii($mask)) { return $offset === NULL ? strcspn($str, $mask) : ($length === NULL ? strcspn($str, $mask, $offset) : strcspn($str, $mask, $offset, $length)); } if ($offset !== NULL or $length !== NULL) { $str = UTF8::substr($str, $offset, $length); } // Escape these characters: - [ ] . : \ ^ / // The . and : are escaped to prevent possible warnings about POSIX regex elements $mask = preg_replace('#[-[\\].:\\\\^/]#', '\\\\$0', $mask); preg_match('/^[^' . $mask . ']+/u', $str, $matches); return isset($matches[0]) ? UTF8::strlen($matches[0]) : 0; }
/** * Outputs the Captcha image. * * @param boolean $html HTML output * @return mixed */ public function render($html = TRUE) { // Creates $this->image $this->image_create(Captcha::$config['background']); // Add a random gradient if (empty(Captcha::$config['background'])) { $color1 = imagecolorallocate($this->image, mt_rand(200, 255), mt_rand(200, 255), mt_rand(150, 255)); $color2 = imagecolorallocate($this->image, mt_rand(200, 255), mt_rand(200, 255), mt_rand(150, 255)); $this->image_gradient($color1, $color2); } // Add a few random lines for ($i = 0, $count = mt_rand(5, Captcha::$config['complexity'] * 4); $i < $count; $i++) { $color = imagecolorallocatealpha($this->image, mt_rand(0, 255), mt_rand(0, 255), mt_rand(100, 255), mt_rand(50, 120)); imageline($this->image, mt_rand(0, Captcha::$config['width']), 0, mt_rand(0, Captcha::$config['width']), Captcha::$config['height'], $color); } // Calculate character font-size and spacing $default_size = min(Captcha::$config['width'], Captcha::$config['height'] * 2) / (UTF8::strlen($this->response) + 1); $spacing = (int) (Captcha::$config['width'] * 0.9 / UTF8::strlen($this->response)); // Draw each Captcha character with varying attributes for ($i = 0, $strlen = UTF8::strlen($this->response); $i < $strlen; $i++) { // Use different fonts if available $font = Captcha::$config['fontpath'] . Captcha::$config['fonts'][array_rand(Captcha::$config['fonts'])]; // Allocate random color, size and rotation attributes to text $color = imagecolorallocate($this->image, mt_rand(0, 150), mt_rand(0, 150), mt_rand(0, 150)); $angle = mt_rand(-40, 20); // Scale the character size on image height $size = $default_size / 10 * mt_rand(8, 12); $box = imageftbbox($size, $angle, $font, UTF8::substr($this->response, $i, 1)); // Calculate character starting coordinates $x = $spacing / 4 + $i * $spacing; $y = Captcha::$config['height'] / 2 + ($box[2] - $box[5]) / 4; // Write text character to image imagefttext($this->image, $size, $angle, $x, $y, $color, $font, UTF8::substr($this->response, $i, 1)); } // Output return $this->image_render($html); }
/** * Helper for Kohana::dump(), handles recursion in arrays and objects. * * @param mixed variable to dump * @param integer maximum length of strings * @param integer recursion level (internal) * @return string */ protected static function _dump(&$var, $length = 128, $level = 0) { if ($var === NULL) { return '<small>NULL</small>'; } elseif (is_bool($var)) { return '<small>bool</small> ' . ($var ? 'TRUE' : 'FALSE'); } elseif (is_float($var)) { return '<small>float</small> ' . $var; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === 'stream' and $meta = stream_get_meta_data($var)) { $meta = stream_get_meta_data($var); if (isset($meta['uri'])) { $file = $meta['uri']; if (function_exists('stream_is_local')) { // Only exists on PHP >= 5.2.4 if (stream_is_local($file)) { $file = Kohana::debug_path($file); } } return '<small>resource</small><span>(' . $type . ')</span> ' . htmlspecialchars($file, ENT_NOQUOTES, Kohana::$charset); } } else { return '<small>resource</small><span>(' . $type . ')</span>'; } } elseif (is_string($var)) { if (UTF8::strlen($var) > $length) { // Encode the truncated string $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, Kohana::$charset) . ' …'; } else { // Encode the string $str = htmlspecialchars($var, ENT_NOQUOTES, Kohana::$charset); } return '<small>string</small><span>(' . strlen($var) . ')</span> "' . $str . '"'; } elseif (is_array($var)) { $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); static $marker; if ($marker === NULL) { // Make a unique marker $marker = uniqid(""); } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "(\n{$space}{$s}*RECURSION*\n{$space})"; } elseif ($level < 5) { $output[] = "<span>("; $var[$marker] = TRUE; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '"' . htmlspecialchars($key, ENT_NOQUOTES, self::$charset) . '"'; } $output[] = "{$space}{$s}{$key} => " . Kohana::_dump($val, $length, $level + 1); } unset($var[$marker]); $output[] = "{$space})</span>"; } else { // Depth too great $output[] = "(\n{$space}{$s}...\n{$space})"; } return '<small>array</small><span>(' . count($var) . ')</span> ' . implode("\n", $output); } elseif (is_object($var)) { // Copy the object as an array $array = (array) $var; $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); $hash = spl_object_hash($var); // Objects that are being dumped static $objects = array(); if (empty($var)) { // Do nothing } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < 10) { $output[] = "<code>{"; $objects[$hash] = TRUE; foreach ($array as $key => &$val) { if ($key[0] === "") { // Determine if the access is protected or protected $access = '<small>' . ($key[1] === '*' ? 'protected' : 'private') . '</small>'; // Remove the access level from the variable name $key = substr($key, strrpos($key, "") + 1); } else { $access = '<small>public</small>'; } $output[] = "{$space}{$s}{$access} {$key} => " . Kohana::_dump($val, $length, $level + 1); } unset($objects[$hash]); $output[] = "{$space}}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return '<small>object</small> <span>' . get_class($var) . '(' . count($array) . ')</span> ' . implode("\n", $output); } else { return '<small>' . gettype($var) . '</small> ' . htmlspecialchars(print_r($var, TRUE), ENT_NOQUOTES, Kohana::$charset); } }
/** * Replaces the given words with a string. * * @param string phrase to replace words in * @param array words to replace * @param string replacement string * @param boolean replace words across word boundries (space, period, etc) * @return string */ public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE) { foreach ((array) $badwords as $key => $badword) { $badwords[$key] = str_replace('\\*', '\\S*?', preg_quote((string) $badword)); } $regex = '(' . implode('|', $badwords) . ')'; if ($replace_partial_words === FALSE) { // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself $regex = '(?<=\\b|\\s|^)' . $regex . '(?=\\b|\\s|$)'; } $regex = '!' . $regex . '!ui'; if (UTF8::strlen($replacement) == 1) { $regex .= 'e'; return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $str); } return preg_replace($regex, $replacement, $str); }
public function create_username($id = null) { $min_length = $this->username_min_length(); $max_length = $this->username_max_length(); if ($id != '') { $result = 'user' . $id; if (UTF8::strlen($result) >= $min_length && UTF8::strlen($result) <= $max_length && $this->unique_username($result, null, true)) { return $result; } } $length = rand($min_length, $max_length); $chars = 'abcdefghijklmnopqrstuvwxyz'; $chars_length = strlen($chars); do { $result = ''; for ($i = 0; $i < $length; $i++) { $result .= $chars[mt_rand(0, $chars_length)]; } } while (!$this->unique_username($result, null, true)); return $result; }
private function _bigram_exists($word, $lang) { $word = $lang === 'en' ? strtolower($word) : UTF8::lowercase($word); #шаг 0. #проверяем слова в списке слов-исключений if (array_key_exists($word, $this->words_exceptions[$lang])) { return false; } #шаг 1. #проверка на 4 согласные буквы подряд; пример: больши{нств}о, юрисконсу{льтс}тво if (preg_match('/(?:' . $this->consonant_lc[$lang] . '){4}/sxSX', $word, $m) && !array_key_exists($m[0], $this->consonants4_lc[$lang])) { return true; } #шаг 2. #проверка на 3 гласные буквы подряд; пример: длиннош{еее}, зм{еео}бразный if (preg_match('/(?:' . $this->vowel_lc[$lang] . '){3}/sxSX', $word, $m) && !array_key_exists($m[0], $this->vowels3_lc[$lang])) { return true; } #шаг 3. $length = UTF8::strlen($word); for ($pos = 0, $limit = $length - 1; $pos < $limit; $pos++) { /* TODO Качество проверки по несуществующим биграммам можно немного повысить, если учитывать не только начало и конец слова, но и все позиции биграмм в слове. */ $ss = UTF8::substr($word, $pos, 2); if ($pos === 0) { $ss = ' ' . $ss; } elseif ($pos === $limit - 1) { $ss = $ss . ' '; } #ending of word if (array_key_exists($ss, $this->bigrams)) { return true; } } return false; }
/** * Грамматический разбор html кода на предложения и слова * * @param string $s Html текст * @param array|null $words Массив всех слов: * array(<абсол._поз._слова> => <слово>, ...) * @param array|null $sentences Массив предложений: * array( * <номер_предложения> => array( * <абсол._поз._слова> => <слово>, * ... * ), * ... * ) * Внимание! Значение передаётся по ссылке: * $sentences[$sentence_pos][$abs_pos] =& $words[$abs_pos]; * @param array|null $uniques Массив уникальных слов, отсортированный по ключам. * В ключах слова в нижнем регистре, в значениях кол-во их появлений в тексте. * @param array|null $offset_map Распределение абс. позиций слов к абс. байтовым позициям в нормализованном тексте: * array(<абсол._поз._слова> => <байт._поз._слова>, ...) * @return string Нормализованный текст */ public function parse($s, array &$words = null, array &$sentences = null, array &$uniques = null, array &$offset_map = null) { $s = $this->normalize($s); /* Розенталь: "(?)" ставится после слова для выражения сомнения или недоумения "(!)" ставится после слова для выражения автора к чужому тексту (согласия, одобрения или иронии, возмущения) */ preg_match_all('~(?>#1 letters ( #\\p{L}++ (?>' . $this->re_langs . ') #special (?> \\# (?!\\p{L}|\\d) #programming languages: C# | \\+\\+?+ (?!\\p{L}|\\d) #programming languages: C++, T++, B+ trees, Европа+; but not C+E, C+5 )?+ ) #2 numbers | ( \\d++ #digits (?> % (?!\\p{L}|\\d) )?+ #brand names: 120% ) #| \\p{Nd}++ #decimal number #| \\p{Nl}++ #letter number #| \\p{No}++ #other number #paragraph (see self::normalize()) | \\r\\r #sentence end by dot | \\. (?=[\\x20' . " " . '] (?!\\p{Ll}) #following symbol not letter in lowercase ) #sentence end by other | (?<!\\() #previous symbol not bracket [!?;…]++ #sentence end #following symbol not (?!["\\)' . "»" . "”" . "’" . "“" . '] ) ) ~sxuSX', $s, $m, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); #cleanup $words = array(); $sentences = array(); $uniques = array(); $offset_map = array(); #init $sentence_pos = 0; #номер предложения $abs_pos = 0; #номер абсолютной позиции слова в тексте $w_prev = false; #предыдущее слово foreach ($m as $i => $a) { $is_alpha = $is_digit = false; if ($is_digit = array_key_exists(2, $a)) { list($w, $pos) = $a[2]; } elseif ($is_alpha = array_key_exists(1, $a)) { list($w, $pos) = $a[1]; } else { list($w, $pos) = $a[0]; if ($w !== '.') { if (!empty($sentences[$sentence_pos])) { $w_prev = false; $sentence_pos++; } continue; } if (!empty($sentences[$sentence_pos])) { $tmp = $w_prev; $w_prev = false; if ($tmp === false || UTF8::strlen($tmp) < 2 && !ctype_digit($tmp) || is_array($this->dot_reductions) && array_key_exists(UTF8::lowercase($tmp), $this->dot_reductions)) { continue; } $sentence_pos++; } continue; } $w_prev = $w; $words[$abs_pos] = $w; $sentences[$sentence_pos][$abs_pos] =& $words[$abs_pos]; $offset_map[$abs_pos] = $pos; $abs_pos++; } $uniques = array_count_values(explode(PHP_EOL, UTF8::lowercase(implode(PHP_EOL, $words)))); ksort($uniques, SORT_REGULAR); #d($words, $sentences, $uniques, $offset_map); return $s; }
/** * Check an email address for correct format. * * @link http://www.iamcal.com/publish/articles/php/parsing_email/ * @link http://www.w3.org/Protocols/rfc822/ * * @param string $email email address * @param boolean $strict strict RFC compatibility * * @return boolean */ public static function email($email, $strict = false) { if (UTF8::strlen($email) > 254) { return false; } if ($strict === true) { $qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]'; $dtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]'; $atom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+'; $pair = '\\x5c[\\x00-\\x7f]'; $domain_literal = "\\x5b({$dtext}|{$pair})*\\x5d"; $quoted_string = "\\x22({$qtext}|{$pair})*\\x22"; $sub_domain = "({$atom}|{$domain_literal})"; $word = "({$atom}|{$quoted_string})"; $domain = "{$sub_domain}(\\x2e{$sub_domain})*"; $local_part = "{$word}(\\x2e{$word})*"; $expression = "/^{$local_part}\\x40{$domain}\$/D"; } else { $expression = '/^[-_a-z0-9\'+*$^&%=~!?{}]++(?:\\.[-_a-z0-9\'+*$^&%=~!?{}]+)*+@(?:(?![-.])[-a-z0-9.]+(?<![-.])\\.[a-z]{2,6}|\\d{1,3}(?:\\.\\d{1,3}){3})$/iD'; } return (bool) preg_match($expression, (string) $email); }
/** * Checks that a field is exactly the right length. * * @param string value * @param integer exact length required * @return boolean */ public static function exact_length($value, $length) { return UTF8::strlen($value) === $length; }
/** * Helper for Debug::dump(), handles recursion in arrays and objects. * * @param mixed $var Variable to dump * @param integer $length Maximum length of strings [Optional] * @param integer $limit Recursion limit [Optional] * @param integer $level Current recursion level (internal usage only!) [Optional] * * @return string */ protected static function _dump(&$var, $length = 128, $limit = 10, $level = 0) { if ($var === NULL) { return '<small style="color: #3465a4">NULL</small>'; } elseif (is_bool($var)) { return '<small>bool</small> <span style="color:#4e9a06">' . ($var ? 'TRUE' : 'FALSE') . '</span>'; } elseif (is_float($var)) { return '<small>float</small> <span style="color:#4e9a06">' . $var . '</span>'; } elseif (is_integer($var)) { return '<small>int</small> <span style="color:#4e9a06">' . $var . '</span>'; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === 'stream' and $meta = stream_get_meta_data($var)) { $meta = stream_get_meta_data($var); if (isset($meta['uri'])) { $file = $meta['uri']; if (stream_is_local($file)) { $file = Debug::path($file); } return '<small>resource</small><span>(' . $type . ')</span> ' . htmlspecialchars($file, ENT_NOQUOTES, Kohana::$charset); } } else { return '<small>resource</small><span>(' . $type . ')</span>'; } } elseif (is_string($var)) { // Clean invalid multibyte characters. iconv is only invoked // if there are non ASCII characters in the string, so this // isn't too much of a hit. $var = UTF8::clean($var, Kohana::$charset); if (UTF8::strlen($var) > $length) { // Encode the truncated string $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, Kohana::$charset) . ' …'; } else { // Encode the string $str = htmlspecialchars($var, ENT_NOQUOTES, Kohana::$charset); } return '<small>string</small> <span style="color:#cc0000">\'' . $str . '\'</span>(<span style="font-style:italic">length=' . strlen($var) . '</span>)'; } elseif (is_array($var)) { $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); static $marker; if ($marker === NULL) { // Make a unique marker $marker = uniqid(""); } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "\n{$space}{$s}*RECURSION*\n{$space}"; } elseif ($level < $limit) { $output[] = "<span>"; $var[$marker] = TRUE; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '"' . htmlspecialchars($key, ENT_NOQUOTES, Kohana::$charset) . '"'; } $output[] = "{$space}{$s}{$key} => " . Debug::_dump($val, $length, $limit, $level + 1); } unset($var[$marker]); $output[] = "{$space}</span>"; } else { // Depth too great $output[] = "\n{$space}{$s}...\n{$space}"; } return '<strong>array</strong> <span style="font-style:italic">(size=' . count($var) . ')</span> ' . implode(PHP_EOL, $output); } elseif (is_object($var)) { // Copy the object as an array $array = (array) $var; $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); $hash = spl_object_hash($var); // Objects that are being dumped static $objects = array(); if (empty($var)) { // Do nothing } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < $limit) { $output[] = "<code>"; $objects[$hash] = TRUE; foreach ($array as $key => &$val) { if ($key[0] === "") { // Determine if the access is protected or protected $access = '<span style="font-style:italic">' . ($key[1] === '*' ? 'protected' : 'private') . '</span>'; // Remove the access level from the variable name $key = substr($key, strrpos($key, "") + 1); } else { $access = '<span style="font-style:italic">public</span>'; } $output[] = "{$space}{$s}{$access} '{$key}' <span style='color:#888a85'>=></span> " . Debug::_dump($val, $length, $limit, $level + 1); } unset($objects[$hash]); $output[] = "{$space}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return '<strong>object</strong>(<span style="font-style:italic">' . get_class($var) . '</span>)' . '[<span style="font-style:italic">' . count($array) . '</span>]' . implode(PHP_EOL, $output); } else { return '<small>' . gettype($var) . '</small> ' . htmlspecialchars(print_r($var, TRUE), ENT_NOQUOTES, Kohana::$charset); } }
/** * Tests UTF8::strlen * * @test * @dataProvider provider_strlen */ public function test_strlen($input, $expected) { $this->assertSame($expected, UTF8::strlen($input)); UTF8::$server_utf8 = !UTF8::$server_utf8; $this->assertSame($expected, UTF8::strlen($input)); UTF8::$server_utf8 = !UTF8::$server_utf8; }
/** * Find position of last occurance of a string in another string * Compatible with mb_strrpos(), an UTF-8 friendly replacement for strrpos() */ function strrpos($haystack, $needle) { $pos = strrpos($haystack, $needle); if ($pos === false) { return false; } else { return UTF8::strlen(substr($haystack, 0, $pos)); } }