function convert($string) { if (!$this->started) { $this->started = true; $string = Utf8::instance()->trim($string); return $this->encoding_converter->convert($string); } else { return $this->encoding_converter->convert($string); } return $string; }
static function ucwords($s) { if (false !== strpos($s, 'i')) { $s = preg_replace('/\\bi/u', 'İ', $s); } return parent::ucwords($s); }
/** * transcode to ISO 8859 * * To be used only when there is no other alternative. * * @link http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT ISO/IEC 8859-15:1999 to Unicode * * @param string a complex string using unicode entities * @param string optional characters to accept * @return a ISO 8859 string * * @see feeds/flash/slashdot.php */ public static function &to_iso8859($utf, $options = '') { // iso-8859-15 + Microsoft extensions cp1252 list($iso_entities, $unicode_entities) = Utf8::get_iso8859(); // transcode Unicode entities to iso 8859 $text = str_replace($unicode_entities, $iso_entities, $utf); // translate only 1-byte entities $areas = preg_split('/&#(\\d+);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); $text = ''; $index = 0; foreach ($areas as $area) { switch ($index % 2) { case 0: // before entity $text .= $area; break; case 1: // the entity itself // get the integer value $unicode = intval($area); // one ASCII byte if ($unicode < 0xff) { $text .= chr($unicode); } else { $text .= '_'; } break; } $index++; } // done return $text; }
/** * 验证路由表规则 * @param Void * @return Void */ public function matchs($uri) { $matchs = array(); if ($this->routes) { if ($uri == '') { //uri为空,则使用默认路由规则(路由规则表最后一项) return $this->parse(NULL, NULL); } else { //遍历路由表规则,如果匹配其中一项,则退出 foreach ($this->routes as $rule) { //定义正则捕获组名 如:(<action>)-(<category>)=>(?<action>)-(?<category>) $pattern = preg_replace('/(?<=[(])(?=[<])/', '?', $rule[0]); //定义正则表达式字符范围 如:(?<action>)-(?<category>) => (?<action>[\w]+)-(?<category>[\w]+) $pattern = preg_replace('/(?<=[>])(?=[)])/', '[\\w]+', $pattern); if ($rule[1] && is_array($rule[1])) { //自定义字符范围 foreach ($rule[1] as $k => $v) { $pattern = preg_replace('/(?<=(' . $k . ')[>])\\[\\\\w\\]\\+(?=[)])/', $v, $pattern); } } //判断$uri是包含多字节,如果包含,则需要转码 if (!Utf8::isAscii($uri)) { $uri = iconv('gb2312', 'utf-8', $uri); } //将当前路由规格与uri进行匹配 if (preg_match("~^{$pattern}\$~u", $uri, $matchs)) { //成功匹配,交由Request处理 $default = array(); //默认路由规则 if ($rule[2]) { $default = $rule[2]; } return $this->parse($matchs, $default); break; } } } } //所有路由规则匹配失败,抛出异常 if (!$matchs) { throw new Ada_Exception('Unable to find a route to match'); } }
/** * Finally */ public function finish() { // turn U and Y back into lower case, and remove the umlaut accent from a, o and u. $this->word = Utf8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word); }
/** * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, * or * (b) a vowel at the beginning of the word followed by a non-vowel. * * So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables. * But uproot, bestow, disturb do not end with a short syllable. */ private function searchShortSyllabe($from, $nbLetters) { $length = Utf8::strlen($this->word); if ($from < 0) { $from = $length + $from; } if ($from < 0) { $from = 0; } // (a) is just for beginning of the word if ($nbLetters == 2 && $from != 0) { return false; } $first = Utf8::substr($this->word, $from, 1); $second = Utf8::substr($this->word, $from + 1, 1); if ($nbLetters == 2) { if (in_array($first, self::$vowels) && !in_array($second, self::$vowels)) { return true; } } $third = Utf8::substr($this->word, $from + 2, 1); if (!in_array($first, self::$vowels) && in_array($second, self::$vowels) && !in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w')))) { return true; } return false; }
/** * Step 3: * Search for the longest among the following suffixes in R1, and perform the action indicated. */ private function step3() { // lig ig els // delete if (($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) { $this->word = Utf8::substr($this->word, 0, $position); return true; } // löst // replace with lös if ($this->searchIfInR1(array('löst')) !== false) { $this->word = Utf8::substr($this->word, 0, -1); return true; } // fullt // replace with full if ($this->searchIfInR1(array('fullt')) !== false) { $this->word = Utf8::substr($this->word, 0, -1); return true; } }
/** * Returns the Utf8 encoding. * * @return Utf8 */ public static function utf8() { return Utf8::instance(); }
/** * @dataProvider provideCodePointAndUtf8 */ public function testUtf8Chr($codePoint, $utf8) { $this->assertEquals($utf8, Utf8::chr($codePoint)); }
/** * Finally */ public function finish() { // turn U and Y back into lower case, and remove the umlaut accent from a, o and u. $this->word = Utf8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word); }
/** * Finally * Turn I, U back into i, u */ public function finish() { // Turn I, U back into i, u $this->word = Utf8::str_replace(array('I', 'U'), array('i', 'u'), $this->word); }
function filter($text) { $text = trim(preg_replace('/(\\s| )/', ' ', ucfirst($text))); $text = Utf8::to_unicode($text); $text = Utf8::to_iso8859($text); return $text; }
/** * If the word begins with two vowels, RV is the region after the third letter, * otherwise the region after the first vowel not at the beginning of the word, * or the end of the word if these positions cannot be found. * (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.) */ protected function rv() { $length = Utf8::strlen($this->word); $this->rv = ''; $this->rvIndex = $length; if ($length < 3) { return true; } // If the word begins with two vowels, RV is the region after the third letter $first = Utf8::substr($this->word, 0, 1); $second = Utf8::substr($this->word, 1, 1); if (in_array($first, self::$vowels) && in_array($second, self::$vowels)) { $this->rv = Utf8::substr($this->word, 3); $this->rvIndex = 3; return true; } // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.) $begin3 = Utf8::substr($this->word, 0, 3); if (in_array($begin3, array('par', 'col', 'tap'))) { $this->rv = Utf8::substr($this->word, 3); $this->rvIndex = 3; return true; } // otherwise the region after the first vowel not at the beginning of the word, for ($i = 1; $i < $length; $i++) { $letter = Utf8::substr($this->word, $i, 1); if (in_array($letter, self::$vowels)) { $this->rv = Utf8::substr($this->word, $i + 1); $this->rvIndex = $i + 1; return true; } } return false; }
/** * Step 3: * Search for the longest among the following suffixes in R1, and if found, delete. */ private function step3() { // leg eleg ig eig lig elig els lov elov slov hetslov if (($position = $this->searchIfInR1(array('hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'))) !== false) { $this->word = Utf8::substr($this->word, 0, $position); } }
/** * Finally * Turn I and Y back into lower case. */ private function finish() { $this->word = Utf8::str_replace(array('I', 'Y'), array('i', 'y'), $this->word); }
<?php namespace Vensko; Utf8::$asciiRange = array_fill_keys(range(0, 127), true); class Utf8 { const UTF16BE_BOM = "þÿ"; const UTF16LE_BOM = "ÿþ"; const UTF8_BOM = ""; const UTF7_BOM = "+/v"; const UTF32BE_BOM = "þÿ"; const UTF32LE_BOM = "ÿþ"; const ASCII = 'ASCII'; const UTF8 = 'UTF-8'; const UTF7 = 'UTF-7'; const UTF16BE = 'UTF-16BE'; const UTF16LE = 'UTF-16LE'; const UTF32BE = 'UTF-32BE'; const UTF32LE = 'UTF-32LE'; const CP1250 = 'Windows-1250'; const CP1251 = 'Windows-1251'; const CP1252 = 'Windows-1252'; public static $asciiRange = []; /** * Ensures that a string is UTF-8 encoded * * @param string|$str * @param string|null $fromEncoding * @param bool $force * @return string
/** * Transliterate a UTF-8 value to ASCII. * * @param string $value * @return string */ public static function ascii($value) { return Utf8::toAscii($value); }
/** * And finally: * Remove acute accents */ private function finish() { $this->word = Utf8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word); }
/** * Used by spanish, italian, portuguese, etc (but not by french) * * If the second letter is a consonant, RV is the region after the next following vowel, * or if the first two letters are vowels, RV is the region after the next consonant, * and otherwise (consonant-vowel case) RV is the region after the third letter. * But RV is the end of the word if these positions cannot be found. */ protected function rv() { $length = Utf8::strlen($this->word); $this->rv = ''; $this->rvIndex = $length; if ($length < 3) { return true; } $first = Utf8::substr($this->word, 0, 1); $second = Utf8::substr($this->word, 1, 1); // If the second letter is a consonant, RV is the region after the next following vowel, if (!in_array($second, static::$vowels)) { for ($i = 2; $i < $length; $i++) { $letter = Utf8::substr($this->word, $i, 1); if (in_array($letter, static::$vowels)) { $this->rvIndex = $i + 1; $this->rv = Utf8::substr($this->word, $i + 1); return true; } } } // or if the first two letters are vowels, RV is the region after the next consonant, if (in_array($first, static::$vowels) && in_array($second, static::$vowels)) { for ($i = 2; $i < $length; $i++) { $letter = Utf8::substr($this->word, $i, 1); if (!in_array($letter, static::$vowels)) { $this->rvIndex = $i + 1; $this->rv = Utf8::substr($this->word, $i + 1); return true; } } } // and otherwise (consonant-vowel case) RV is the region after the third letter. if (!in_array($first, static::$vowels) && in_array($second, static::$vowels)) { $this->rv = Utf8::substr($this->word, 3); $this->rvIndex = 3; return true; } }
foreach ($categories->execute() as $category) { ?> <li<?php echo strpos(Request::current()->url(), Route::url('category', array('link' => $category->link))) !== FALSE ? ' class="active"' : NULL; ?> ><a href="<?php echo Route::url('category', array('link' => $category->link)); ?> " id="sub-<?php echo $category->link; ?> " title="<?php echo $category->title; ?> "><?php echo Utf8::ucfirst($category->name); ?> </a></li> <?php } ?> </ul> </div> </li> <li<?php echo Arr::path($active, 'home'); ?> ><a href="<?php echo Route::url('home'); ?> " id="main-home" title="O mnie">O mnie</a></li>