Example #1
1
 /**
  */
 protected static function _intlToAscii($str)
 {
     if (!isset(self::$_transliterator)) {
         self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII');
     }
     return self::$_transliterator->transliterate($str);
 }
Example #2
0
 /**
  * Creates a slug to be used for pretty URLs.
  *
  * @param  string $string
  * @param  array  $replace
  * @param  string $delimiter
  * @return string
  *
  * @throws \Phalcon\Exception
  */
 public static function generate($string, $replace = [], $delimiter = '-')
 {
     if (!extension_loaded('intl')) {
         throw new Exception('intl module not loaded');
     }
     // Save the old locale and set the new locale to UTF-8
     $oldLocale = setlocale(LC_ALL, '0');
     setlocale(LC_ALL, 'en_US.UTF-8');
     // Better to replace given $replace array as index => value
     // Example $replace['ı' => 'i', 'İ' => 'i'];
     if (!empty($replace) && is_array($replace)) {
         $string = str_replace(array_keys($replace), array_values($replace), $string);
     }
     $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
     $string = $transliterator->transliterate(mb_convert_encoding(htmlspecialchars_decode($string), 'UTF-8', 'auto'));
     // replace non letter or non digits by -
     $string = preg_replace('#[^\\pL\\d]+#u', '-', $string);
     // Trim trailing -
     $string = trim($string, '-');
     $clean = preg_replace('~[^-\\w]+~', '', $string);
     $clean = strtolower($clean);
     $clean = preg_replace('#[\\/_|+ -]+#', $delimiter, $clean);
     $clean = trim($clean, $delimiter);
     // Revert back to the old locale
     setlocale(LC_ALL, $oldLocale);
     return $clean;
 }
Example #3
0
 /**
  * @param string $stringToSantize
  * @return string
  * @throws CException
  */
 private function sanitize($stringToSantize)
 {
     $slug = Transliterator::create($this->rules)->transliterate($stringToSantize);
     if (empty($slug)) {
         throw new CException("Empty santized result. Check input string for encoding, UTF-8 is required.");
     }
     return trim(preg_replace('/[-\\s]+/', $this->delimiter, $slug), $this->delimiter);
 }
 public function __construct()
 {
     if (class_exists('Transliterator')) {
         $this->use_transliterator = true;
         // Use Any-Latin to munge Cyrillic, Kanji, etc
         // Then convert anything outside the ISO-8859-1 range to nearest ASCII
         $this->transliterator = Transliterator::create('Any-Latin; [^a-ÿ] Latin-ASCII');
     }
 }
Example #5
0
 public static function slugify($string)
 {
     $prepared = str_replace(array('я', 'ю', 'ї', 'є', 'ж', 'ч', 'ш', 'щ', 'ь'), array('ya', 'yu', 'yi', 'ye', 'zh', 'ch', 'sh', 'sch', ''), $string);
     $transliterated = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();')->transliterate($prepared);
     $clean = preg_replace('/\\W/i', '-', $transliterated);
     $replaced = str_replace('--', '-', $clean);
     $result = preg_replace('/[[:^print:]]/', '', $replaced);
     return $result;
 }
Example #6
0
 private static function transliterate($string)
 {
     $transId = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;';
     if (function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create($transId))) {
         $transString = $transliterator->transliterate($string);
     } else {
         $transString = static::toAscii($string);
     }
     return preg_replace('/[^A-Za-z0-9_.]/u', '', $transString);
 }
 /**
  * @param string $transliterateOptions
  *
  * @throws TransliterateException
  */
 public function __construct($transliterateOptions = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFKC;')
 {
     if (extension_loaded('intl') === false) {
         throw new TransliterateException('Intl extension not loaded.');
     }
     if ($transliterateOptions) {
         $this->transliterateOptions = $transliterateOptions;
     } else {
         throw new TransliterateException('Invalid $transliterateOptions.');
     }
     $transliterator = \Transliterator::create($this->transliterateOptions);
     if (!$transliterator instanceof \Transliterator) {
         throw new TransliterateException('$transliterator is not instance of Transliterator');
     }
     $this->transliterator = $transliterator;
 }
 public function edit()
 {
     if (!empty($_POST['filename']) && !empty($_POST['projectname'])) {
         $filename = mysql_escape_string($_POST['filename']);
         $projectname = mysql_escape_string($_POST['projectname']);
         $transliterator = \Transliterator::create('NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();');
         $id = str_replace(' ', '_', $transliterator->transliterate($projectname));
         $filename = str_replace(' ', '_', $transliterator->transliterate($projectname));
         if (!empty($_GET['id']) && $this->webStorageIndex->indexHasId($_GET['id'])) {
             $this->webStorageIndex->setIndexProjectname($_GET['id'], $projectname);
             $this->webStorageIndex->save();
         } elseif (!$this->webStorageIndex->addIndex($id, $projectname, $filename)) {
             $this->view->setParameter('timesheet', array('projectname' => $projectname, 'filename' => $filename, 'error' => strlen($id) > 3 || strlen($filename) > 3 || strlen($projectname) > 3 ? 'Min length is not right' : 'This project or filename already exists'));
         } else {
             $this->webStorageIndex->save();
         }
     }
 }
 /**
  * Slugify a string
  *
  * @param string $text    Text to slugify
  * @param string $default Default return value (override when slugify would return an empty string)
  *
  * @return string
  */
 public function slugify($text, $default = '', $replace = array("'"), $delimiter = '-')
 {
     if (!empty($replace)) {
         $text = str_replace($replace, ' ', $text);
     }
     // transliterate
     if (class_exists('Transliterator')) {
         $text = mb_convert_encoding((string) $text, 'UTF-8', mb_list_encodings());
         $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
         $text = $transliterator->transliterate($text);
     }
     $text = preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $text);
     $text = strtolower(trim($text, $delimiter));
     $text = preg_replace("/[\\/_|+ -]+/", $delimiter, $text);
     if (empty($text)) {
         return empty($default) ? '' : $default;
     }
     return $text;
 }
Example #10
0
 public static function sanitize_id($title)
 {
     if (class_exists('Transliterator')) {
         $transliterator = Transliterator::create('Any-Latin; Latin-ASCII');
         if (is_a($transliterator, 'Transliterator')) {
             $title = $transliterator->transliterate($title);
         } else {
             $transliterator = Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC');
             if (is_a($transliterator, 'Transliterator')) {
                 $title = $transliterator->transliterate($title);
             }
         }
     } else {
         if (function_exists('iconv')) {
             $title = iconv('UTF-8', 'ASCII//TRANSLIT', $title);
         }
     }
     $title = preg_replace('/[^(\\x20-\\x7F)]*/', '', $title);
     return sanitize_title_with_dashes($title);
 }
Example #11
0
 /**
  * warning requires `yum -y install php-intl`
  * for transliterator to work
  * @param $text
  *
  * @return mixed|string
  */
 public static function slugify($text)
 {
     static $transliteratorExists;
     if (empty($text)) {
         return 'n-a';
     }
     if (is_null($transliteratorExists)) {
         $transliteratorExists = function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();") !== null);
     }
     if ($transliteratorExists === true) {
         return preg_replace('#[ -]+#', '-', transliterator_transliterate('Any-Latin; Latin-ASCII; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();', $text));
     }
     // replace non letter or digits by -
     $text = preg_replace('~[^\\pL\\d]+~u', '-', $text);
     $text = trim($text, '-');
     $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
     $text = strtolower($text);
     $text = preg_replace('~[^-\\w]+~', '', $text);
     if (empty($text)) {
         return 'n-a';
     }
     return $text;
 }
Example #12
0
 /**
  * Get transliterator.
  * See http://userguide.icu-project.org/transforms/general for $identifier.
  *
  * @param   string  $identifier    Identifier.
  * @return  \Transliterator
  */
 public static function getTransliterator($identifier)
 {
     if (false === class_exists('Transliterator')) {
         return null;
     }
     return \Transliterator::create($identifier);
 }
Example #13
0
 /**
  *	Transliterator
  *	@return \Touchbase\Utils\Transliterator
  */
 public static function transliterator()
 {
     if (self::$transliterator === null) {
         self::$transliterator = Transliterator::create();
     }
     return self::$transliterator;
 }
 private static function toLowerMb($str)
 {
     if (self::$to_lower === null) {
         self::$to_lower = \Transliterator::create('Any-Lower');
     }
     return self::$to_lower->transliterate($str);
 }
Example #15
0
<?php

var_dump(Transliterator::create(str_repeat("x", 20000)));
var_dump(intl_get_error_message());
Example #16
0
<?php

$id = "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();";
$transliterator = Transliterator::create($id);
$string = "garçon-étudiant-où-L'école";
echo $transliterator->transliterate($string);
// garconetudiantoulecole
Example #17
0
<?php

ini_set("intl.error_level", E_WARNING);
$str = "a U+4E07";
$t = Transliterator::create("hex-any");
echo $t->id, ": ", $t->transliterate($str), "\n";
$u = clone $t;
echo $u->id, ": ", $u->transliterate($str), "\n";
echo "Done.\n";
Example #18
0
 /**
  * @ignore
  */
 protected static function handleTransform($string, $transform)
 {
     $translit = Transliterator::create($transform);
     $string = $translit->transliterate($string);
     if (is_cstring($string)) {
         return $string;
     } else {
         assert('false', vs(isset($this), get_defined_vars()));
         return "";
     }
 }
Example #19
0
 function slug($str, $isUtf8 = true)
 {
     if (class_exists('Transliterator')) {
         $translit = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();');
         return preg_replace('/\\s/', '-', $translit->transliterate($str));
     } else {
         if (!$isUtf8) {
             $str = strtr($str, utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn");
         } else {
             $str = strtr(utf8_decode($str), utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn");
         }
         $str = preg_replace('/[^a-z0-9_-\\s]/', '', strtolower($str));
         $str = preg_replace('/[\\s]+/', ' ', trim($str));
         $str = str_replace(' ', '-', $str);
         return $str;
     }
 }
<?php

ini_set("intl.error_level", E_WARNING);
$tr = Transliterator::create("Katakana-Latin");
$orstr = "オーシャンビュー";
$new_str = $tr->transliterate($orstr);
$revtr = $tr->createInverse();
$recovstr = $revtr->transliterate($new_str);
$revtr2 = transliterator_create_inverse($tr);
$recovstr2 = $revtr2->transliterate($new_str);
echo $orstr, "\n";
echo $new_str, "\n";
echo $recovstr, "\n";
var_dump(($orstr == $recovstr) == $recovstr2);
echo "Done.\n";
 /**
  * Transliterates the input string to an ASCII equivalent string.
  * Transliteration is language dependent.
  * @param string $string        The input string, UTF-8.
  * @param string $language      Language identificator ('bg', 'ru', ...)
  * @return string
  */
 public static function to_ascii($string, $language = null)
 {
     $language = self::detect_language($language);
     $string = self::cyr_to_lat($string, $language);
     static $transliterator_ids = array();
     if (INTL_INSTALLED && class_exists('Transliterator', false)) {
         if (empty($transliterator_ids)) {
             $transliterator_ids = Transliterator::listIDs();
         }
         $transliterator_id = null;
         switch ($language) {
             case 'ar':
                 $transliterator_id = 'Arabic-Latin';
                 break;
             case 'el':
                 $transliterator_id = 'Greek-Latin';
                 break;
             case 'mk':
                 $transliterator_id = 'Macedonian-Latin/BGN';
                 break;
             case 'sr':
                 $transliterator_id = 'Serbian-Latin/BGN';
                 break;
             case 'uk':
                 $transliterator_id = 'Ukrainian-Latin/BGN';
                 break;
             case 'ko':
                 $transliterator_id = 'Korean-Latin/BGN';
                 break;
             case 'th':
                 $transliterator_id = 'Thai-Latin';
                 break;
             case 'gu':
                 $transliterator_id = 'Gujarati-Latin';
                 break;
             case 'ta':
                 $transliterator_id = 'Tamil-Latin';
                 break;
             case 'az':
                 $transliterator_id = 'Azerbaijani-Latin/BGN';
                 break;
         }
         if (!in_array($transliterator_id, $transliterator_ids)) {
             $transliterator_id = null;
         }
         if ($transliterator_id == '') {
             $transliterator_id = 'Any-Latin; Latin-ASCII';
         } else {
             $transliterator_id .= '; Any-Latin; Latin-ASCII';
         }
         $transliterator = Transliterator::create($transliterator_id);
         $new_string = @$transliterator->transliterate($string);
         if ($new_string !== false) {
             $string = $new_string;
         }
         unset($new_string);
     }
     static $search;
     static $replace;
     if (IS_CODEIGNITER) {
         if (!isset($search) || !is_array($search)) {
             // Added by Ivan Tcholakov, 03-OCT-2013.
             if (file_exists(COMMONPATH . 'config/foreign_chars.php')) {
                 include COMMONPATH . 'config/foreign_chars.php';
             }
             if (file_exists(COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) {
                 include COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php';
             }
             //
             if (file_exists(APPPATH . 'config/foreign_chars.php')) {
                 include APPPATH . 'config/foreign_chars.php';
             }
             if (file_exists(APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) {
                 include APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php';
             }
             if (empty($foreign_characters) || !is_array($foreign_characters)) {
                 $search = array();
                 $replace = array();
             } else {
                 $search = array_keys($foreign_characters);
                 $replace = array_values($foreign_characters);
             }
         }
         $string = preg_replace($search, $replace, $string);
     }
     if (ICONV_INSTALLED) {
         $string = iconv('UTF-8', 'ASCII//TRANSLIT', $string);
     }
     return $string;
 }
Example #22
0
 /**
  * Converts to ASCII.
  * @param  string  UTF-8 encoding
  * @return string  ASCII
  */
 public static function toAscii($s)
 {
     $s = preg_replace('#[^\\x09\\x0A\\x0D\\x20-\\x7E\\xA0-\\x{2FF}\\x{370}-\\x{10FFFF}]#u', '', $s);
     $s = strtr($s, '`\'"^~?', "");
     $s = str_replace(array("�", "�", "�", "�", "�", "’", "°"), array("", "", "", "", "", "", ""), $s);
     if (class_exists('Transliterator') && ($transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'))) {
         $s = $transliterator->transliterate($s);
     }
     if (ICONV_IMPL === 'glibc') {
         $s = str_replace(array("»", "«", "…", "™", "©", "®"), array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s);
         $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
         // intentionally @
         $s = strtr($s, "¥£Ό�§�����―Ή³Ύ��Ί���" . "ΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ�Σ" . "ΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθ" . "ικλμνξοπρςστυφψωϊϋόύώ" . "– ‹—›¦­·", "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.");
         $s = preg_replace('#[^\\x00-\\x7F]++#', '', $s);
     } else {
         $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
         // intentionally @
     }
     $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
     return strtr($s, "", '`\'"^~?');
 }
<?php

ini_set("intl.error_level", E_WARNING);
$t = Transliterator::create("[\\p{Bidi_Mirrored}] Hex");
echo transliterator_get_error_code(), "\n";
echo $t->getErrorCode(null), "\n";
echo transliterator_get_error_code(array()), "\n";
 public static function urlify($string, $spacer = '-')
 {
     $string = trim($string);
     $string = htmlspecialchars_decode($string, ENT_QUOTES);
     $string = strip_tags($string);
     $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string);
     $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string);
     // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25
     $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();';
     if (function_exists('transliterator_list_ids')) {
         if (in_array('Latin-ASCII', transliterator_list_ids())) {
             $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();';
         }
     }
     if (function_exists('transliterator_transliterate')) {
         $string = str_replace('-', ' ', $string);
         $s = transliterator_transliterate($tranliterator_rule, $string);
     } else {
         if (class_exists('Transliterator')) {
             $string = str_replace('-', ' ', $string);
             $T = Transliterator::create($tranliterator_rule);
             $s = $T->transliterate($string);
         } else {
             $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string);
             $s = strtolower($s);
             $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s);
         }
     }
     $s = preg_replace('/[\\s\\-]+/', $spacer, $s);
     if (strlen($s) > 0) {
         return $s;
     } else {
         return PerchUtil::urlify_non_translit($string);
     }
 }
<?php

ini_set("intl.error_level", E_WARNING);
$t = Transliterator::create("any-latin");
echo $t->id, "\n";
$t = transliterator_create("any-latin");
echo $t->id, "\n";
echo "Done.\n";
Example #26
0
 /**
  * Converts UTF8 into Latin.
  *
  * @param $value
  *
  * @return mixed
  */
 public static function transliterate($value)
 {
     $transId = 'Any-Latin; Latin-ASCII';
     if (function_exists('transliterator_transliterate') && ($trans = \Transliterator::create($transId))) {
         // Use intl by default
         return $trans->transliterate($value);
     }
     return \URLify::transliterate($value);
 }
Example #27
0
 /**
  * Returns an enum value identifier based on the given value name.
  *
  * @param string $enumValueName
  * @param bool   $throwExceptionIfInvalidName
  *
  * @return string The enum value identifier. Can be empty string if $throwExceptionIfInvalidName = false
  *
  * @throws \InvalidArgumentException
  */
 public static function buildEnumValueId($enumValueName, $throwExceptionIfInvalidName = true)
 {
     if (strlen($enumValueName) === 0) {
         if (!$throwExceptionIfInvalidName) {
             return '';
         }
         throw new \InvalidArgumentException('$enumValueName must not be empty.');
     }
     $tr = \Transliterator::create('Latin; Latin-ASCII; Lower');
     if ($tr) {
         $enumValueName = $tr->transliterate($enumValueName);
     }
     $result = preg_replace(['/ +/', '/-+/', '/[^a-z0-9_]+/i', '/_{2,}/'], ['_', '_', '', '_'], trim($enumValueName));
     if ($result === '_') {
         $result = '';
     }
     if (strlen($result) > self::MAX_ENUM_VALUE_ID_LENGTH) {
         $hash = dechex(crc32($result));
         $result = substr($result, 0, self::MAX_ENUM_VALUE_ID_LENGTH - strlen($hash) - 1) . '_' . $hash;
     }
     if ($throwExceptionIfInvalidName && strlen($result) === 0) {
         throw new \InvalidArgumentException(sprintf('The conversion of "%s" to enum value id produces empty string.', $enumValueName));
     }
     return $result;
 }
Example #28
0
 /**
  * @return Transliterator|NULL
  */
 function getTransliterator()
 {
     if ($this->transliterator === null && self::$default_use_transliterator) {
         $this->transliterator = Transliterator::create();
     }
     return $this->transliterator;
 }
Example #29
0
 public static function translit($str)
 {
     $transliterator = \Transliterator::create('Cyrillic-Latin');
     return $transliterator->transliterate($str);
 }
Example #30
-1
 /**
  * Transliterate using the Transliterator package.
  *
  * @param string $str  Input string (UTF-8).
  *
  * @return mixed  Transliterated string (UTF-8), or false on error.
  */
 protected static function _intlToAscii($str)
 {
     if (class_exists('Transliterator')) {
         if (!isset(self::$_transliterator)) {
             self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII');
         }
         if (!is_null(self::$_transliterator)) {
             /* Returns false on error. */
             return self::$_transliterator->transliterate($str);
         }
     }
     return false;
 }