function convert($string)
 {
     if (!$this->started) {
         $this->started = true;
         $string = Utf8::instance()->trim($string);
         return $this->encoding_converter->convert($string);
     } else {
         return $this->encoding_converter->convert($string);
     }
     return $string;
 }
Example #2
0
 static function ucwords($s)
 {
     if (false !== strpos($s, 'i')) {
         $s = preg_replace('/\\bi/u', 'İ', $s);
     }
     return parent::ucwords($s);
 }
Example #3
0
File: utf8.php Project: rair/yacs
 /**
  * transcode to ISO 8859
  *
  * To be used only when there is no other alternative.
  *
  * @link http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT ISO/IEC 8859-15:1999 to Unicode
  *
  * @param string a complex string using unicode entities
  * @param string optional characters to accept
  * @return a ISO 8859 string
  *
  * @see feeds/flash/slashdot.php
  */
 public static function &to_iso8859($utf, $options = '')
 {
     // iso-8859-15 + Microsoft extensions cp1252
     list($iso_entities, $unicode_entities) = Utf8::get_iso8859();
     // transcode Unicode entities to iso 8859
     $text = str_replace($unicode_entities, $iso_entities, $utf);
     // translate only 1-byte entities
     $areas = preg_split('/&#(\\d+);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
     $text = '';
     $index = 0;
     foreach ($areas as $area) {
         switch ($index % 2) {
             case 0:
                 // before entity
                 $text .= $area;
                 break;
             case 1:
                 // the entity itself
                 // get the integer value
                 $unicode = intval($area);
                 // one ASCII byte
                 if ($unicode < 0xff) {
                     $text .= chr($unicode);
                 } else {
                     $text .= '_';
                 }
                 break;
         }
         $index++;
     }
     // done
     return $text;
 }
Example #4
0
 /**
  * 验证路由表规则
  * @param Void
  * @return Void
  */
 public function matchs($uri)
 {
     $matchs = array();
     if ($this->routes) {
         if ($uri == '') {
             //uri为空,则使用默认路由规则(路由规则表最后一项)
             return $this->parse(NULL, NULL);
         } else {
             //遍历路由表规则,如果匹配其中一项,则退出
             foreach ($this->routes as $rule) {
                 //定义正则捕获组名 如:(<action>)-(<category>)=>(?<action>)-(?<category>)
                 $pattern = preg_replace('/(?<=[(])(?=[<])/', '?', $rule[0]);
                 //定义正则表达式字符范围 如:(?<action>)-(?<category>) => (?<action>[\w]+)-(?<category>[\w]+)
                 $pattern = preg_replace('/(?<=[>])(?=[)])/', '[\\w]+', $pattern);
                 if ($rule[1] && is_array($rule[1])) {
                     //自定义字符范围
                     foreach ($rule[1] as $k => $v) {
                         $pattern = preg_replace('/(?<=(' . $k . ')[>])\\[\\\\w\\]\\+(?=[)])/', $v, $pattern);
                     }
                 }
                 //判断$uri是包含多字节,如果包含,则需要转码
                 if (!Utf8::isAscii($uri)) {
                     $uri = iconv('gb2312', 'utf-8', $uri);
                 }
                 //将当前路由规格与uri进行匹配
                 if (preg_match("~^{$pattern}\$~u", $uri, $matchs)) {
                     //成功匹配,交由Request处理
                     $default = array();
                     //默认路由规则
                     if ($rule[2]) {
                         $default = $rule[2];
                     }
                     return $this->parse($matchs, $default);
                     break;
                 }
             }
         }
     }
     //所有路由规则匹配失败,抛出异常
     if (!$matchs) {
         throw new Ada_Exception('Unable to find a route to match');
     }
 }
Example #5
0
 /**
  * Finally
  */
 public function finish()
 {
     // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
     $this->word = Utf8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
 }
Example #6
0
 /**
  * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel,
  *  or * (b) a vowel at the beginning of the word followed by a non-vowel.
  *
  *  So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables.
  *  But uproot, bestow, disturb do not end with a short syllable.
  */
 private function searchShortSyllabe($from, $nbLetters)
 {
     $length = Utf8::strlen($this->word);
     if ($from < 0) {
         $from = $length + $from;
     }
     if ($from < 0) {
         $from = 0;
     }
     // (a) is just for beginning of the word
     if ($nbLetters == 2 && $from != 0) {
         return false;
     }
     $first = Utf8::substr($this->word, $from, 1);
     $second = Utf8::substr($this->word, $from + 1, 1);
     if ($nbLetters == 2) {
         if (in_array($first, self::$vowels) && !in_array($second, self::$vowels)) {
             return true;
         }
     }
     $third = Utf8::substr($this->word, $from + 2, 1);
     if (!in_array($first, self::$vowels) && in_array($second, self::$vowels) && !in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w')))) {
         return true;
     }
     return false;
 }
Example #7
0
 /**
  * Step 3:
  * Search for the longest among the following suffixes in R1, and perform the action indicated.
  */
 private function step3()
 {
     // lig   ig   els
     //      delete
     if (($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
         $this->word = Utf8::substr($this->word, 0, $position);
         return true;
     }
     // löst
     //      replace with lös
     if ($this->searchIfInR1(array('löst')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         return true;
     }
     // fullt
     //      replace with full
     if ($this->searchIfInR1(array('fullt')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         return true;
     }
 }
 /**
  * Returns the Utf8 encoding.
  * 
  * @return Utf8
  */
 public static function utf8()
 {
     return Utf8::instance();
 }
Example #9
0
 /**
  * @dataProvider provideCodePointAndUtf8
  */
 public function testUtf8Chr($codePoint, $utf8)
 {
     $this->assertEquals($utf8, Utf8::chr($codePoint));
 }
Example #10
0
 /**
  * Finally
  */
 public function finish()
 {
     // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
     $this->word = Utf8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
 }
Example #11
0
 /**
  * Finally
  * Turn I, U back into i, u
  */
 public function finish()
 {
     // Turn I, U back into i, u
     $this->word = Utf8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
 }
Example #12
0
 function filter($text)
 {
     $text = trim(preg_replace('/(\\s|&nbsp;)/', ' ', ucfirst($text)));
     $text = Utf8::to_unicode($text);
     $text = Utf8::to_iso8859($text);
     return $text;
 }
Example #13
0
 /**
  *  If the word begins with two vowels, RV is the region after the third letter,
  *  otherwise the region after the first vowel not at the beginning of the word,
  *  or the end of the word if these positions cannot be found.
  *  (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
  */
 protected function rv()
 {
     $length = Utf8::strlen($this->word);
     $this->rv = '';
     $this->rvIndex = $length;
     if ($length < 3) {
         return true;
     }
     // If the word begins with two vowels, RV is the region after the third letter
     $first = Utf8::substr($this->word, 0, 1);
     $second = Utf8::substr($this->word, 1, 1);
     if (in_array($first, self::$vowels) && in_array($second, self::$vowels)) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
     // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
     $begin3 = Utf8::substr($this->word, 0, 3);
     if (in_array($begin3, array('par', 'col', 'tap'))) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
     //  otherwise the region after the first vowel not at the beginning of the word,
     for ($i = 1; $i < $length; $i++) {
         $letter = Utf8::substr($this->word, $i, 1);
         if (in_array($letter, self::$vowels)) {
             $this->rv = Utf8::substr($this->word, $i + 1);
             $this->rvIndex = $i + 1;
             return true;
         }
     }
     return false;
 }
Example #14
0
 /**
  * Step 3:
  * Search for the longest among the following suffixes in R1, and if found, delete.
  */
 private function step3()
 {
     // leg   eleg   ig   eig   lig   elig   els   lov   elov   slov   hetslov
     if (($position = $this->searchIfInR1(array('hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'))) !== false) {
         $this->word = Utf8::substr($this->word, 0, $position);
     }
 }
Example #15
0
 /**
  * Finally
  * Turn I and Y back into lower case.
  */
 private function finish()
 {
     $this->word = Utf8::str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
 }
Example #16
0
File: Utf8.php Project: vensko/utf8
<?php

namespace Vensko;

Utf8::$asciiRange = array_fill_keys(range(0, 127), true);
class Utf8
{
    const UTF16BE_BOM = "þÿ";
    const UTF16LE_BOM = "ÿþ";
    const UTF8_BOM = "";
    const UTF7_BOM = "+/v";
    const UTF32BE_BOM = "þÿ";
    const UTF32LE_BOM = "ÿþ";
    const ASCII = 'ASCII';
    const UTF8 = 'UTF-8';
    const UTF7 = 'UTF-7';
    const UTF16BE = 'UTF-16BE';
    const UTF16LE = 'UTF-16LE';
    const UTF32BE = 'UTF-32BE';
    const UTF32LE = 'UTF-32LE';
    const CP1250 = 'Windows-1250';
    const CP1251 = 'Windows-1251';
    const CP1252 = 'Windows-1252';
    public static $asciiRange = [];
    /**
     * Ensures that a string is UTF-8 encoded
     *
     * @param string|$str
     * @param string|null $fromEncoding
     * @param bool $force
     * @return string
Example #17
0
 /**
  * Transliterate a UTF-8 value to ASCII.
  *
  * @param  string  $value
  * @return string
  */
 public static function ascii($value)
 {
     return Utf8::toAscii($value);
 }
Example #18
0
 /**
  * And finally:
  * Remove acute accents
  */
 private function finish()
 {
     $this->word = Utf8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
 }
Example #19
0
 /**
  * Used by spanish, italian, portuguese, etc (but not by french)
  *
  * If the second letter is a consonant, RV is the region after the next following vowel,
  * or if the first two letters are vowels, RV is the region after the next consonant,
  * and otherwise (consonant-vowel case) RV is the region after the third letter.
  * But RV is the end of the word if these positions cannot be found.
  */
 protected function rv()
 {
     $length = Utf8::strlen($this->word);
     $this->rv = '';
     $this->rvIndex = $length;
     if ($length < 3) {
         return true;
     }
     $first = Utf8::substr($this->word, 0, 1);
     $second = Utf8::substr($this->word, 1, 1);
     // If the second letter is a consonant, RV is the region after the next following vowel,
     if (!in_array($second, static::$vowels)) {
         for ($i = 2; $i < $length; $i++) {
             $letter = Utf8::substr($this->word, $i, 1);
             if (in_array($letter, static::$vowels)) {
                 $this->rvIndex = $i + 1;
                 $this->rv = Utf8::substr($this->word, $i + 1);
                 return true;
             }
         }
     }
     // or if the first two letters are vowels, RV is the region after the next consonant,
     if (in_array($first, static::$vowels) && in_array($second, static::$vowels)) {
         for ($i = 2; $i < $length; $i++) {
             $letter = Utf8::substr($this->word, $i, 1);
             if (!in_array($letter, static::$vowels)) {
                 $this->rvIndex = $i + 1;
                 $this->rv = Utf8::substr($this->word, $i + 1);
                 return true;
             }
         }
     }
     // and otherwise (consonant-vowel case) RV is the region after the third letter.
     if (!in_array($first, static::$vowels) && in_array($second, static::$vowels)) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
 }
Example #20
0
foreach ($categories->execute() as $category) {
    ?>
							<li<?php 
    echo strpos(Request::current()->url(), Route::url('category', array('link' => $category->link))) !== FALSE ? ' class="active"' : NULL;
    ?>
><a href="<?php 
    echo Route::url('category', array('link' => $category->link));
    ?>
" id="sub-<?php 
    echo $category->link;
    ?>
" title="<?php 
    echo $category->title;
    ?>
"><?php 
    echo Utf8::ucfirst($category->name);
    ?>
</a></li>
						<?php 
}
?>
						</ul>
					</div>
				</li>
				<li<?php 
echo Arr::path($active, 'home');
?>
><a href="<?php 
echo Route::url('home');
?>
" id="main-home" title="O mnie">O mnie</a></li>