/** */ protected static function _intlToAscii($str) { if (!isset(self::$_transliterator)) { self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); } return self::$_transliterator->transliterate($str); }
/** * Creates a slug to be used for pretty URLs. * * @param string $string * @param array $replace * @param string $delimiter * @return string * * @throws \Phalcon\Exception */ public static function generate($string, $replace = [], $delimiter = '-') { if (!extension_loaded('intl')) { throw new Exception('intl module not loaded'); } // Save the old locale and set the new locale to UTF-8 $oldLocale = setlocale(LC_ALL, '0'); setlocale(LC_ALL, 'en_US.UTF-8'); // Better to replace given $replace array as index => value // Example $replace['ı' => 'i', 'İ' => 'i']; if (!empty($replace) && is_array($replace)) { $string = str_replace(array_keys($replace), array_values($replace), $string); } $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); $string = $transliterator->transliterate(mb_convert_encoding(htmlspecialchars_decode($string), 'UTF-8', 'auto')); // replace non letter or non digits by - $string = preg_replace('#[^\\pL\\d]+#u', '-', $string); // Trim trailing - $string = trim($string, '-'); $clean = preg_replace('~[^-\\w]+~', '', $string); $clean = strtolower($clean); $clean = preg_replace('#[\\/_|+ -]+#', $delimiter, $clean); $clean = trim($clean, $delimiter); // Revert back to the old locale setlocale(LC_ALL, $oldLocale); return $clean; }
/** * @param string $stringToSantize * @return string * @throws CException */ private function sanitize($stringToSantize) { $slug = Transliterator::create($this->rules)->transliterate($stringToSantize); if (empty($slug)) { throw new CException("Empty santized result. Check input string for encoding, UTF-8 is required."); } return trim(preg_replace('/[-\\s]+/', $this->delimiter, $slug), $this->delimiter); }
public function __construct() { if (class_exists('Transliterator')) { $this->use_transliterator = true; // Use Any-Latin to munge Cyrillic, Kanji, etc // Then convert anything outside the ISO-8859-1 range to nearest ASCII $this->transliterator = Transliterator::create('Any-Latin; [^a-ÿ] Latin-ASCII'); } }
public static function slugify($string) { $prepared = str_replace(array('я', 'ю', 'ї', 'є', 'ж', 'ч', 'ш', 'щ', 'ь'), array('ya', 'yu', 'yi', 'ye', 'zh', 'ch', 'sh', 'sch', ''), $string); $transliterated = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();')->transliterate($prepared); $clean = preg_replace('/\\W/i', '-', $transliterated); $replaced = str_replace('--', '-', $clean); $result = preg_replace('/[[:^print:]]/', '', $replaced); return $result; }
private static function transliterate($string) { $transId = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;'; if (function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create($transId))) { $transString = $transliterator->transliterate($string); } else { $transString = static::toAscii($string); } return preg_replace('/[^A-Za-z0-9_.]/u', '', $transString); }
/** * @param string $transliterateOptions * * @throws TransliterateException */ public function __construct($transliterateOptions = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFKC;') { if (extension_loaded('intl') === false) { throw new TransliterateException('Intl extension not loaded.'); } if ($transliterateOptions) { $this->transliterateOptions = $transliterateOptions; } else { throw new TransliterateException('Invalid $transliterateOptions.'); } $transliterator = \Transliterator::create($this->transliterateOptions); if (!$transliterator instanceof \Transliterator) { throw new TransliterateException('$transliterator is not instance of Transliterator'); } $this->transliterator = $transliterator; }
public function edit() { if (!empty($_POST['filename']) && !empty($_POST['projectname'])) { $filename = mysql_escape_string($_POST['filename']); $projectname = mysql_escape_string($_POST['projectname']); $transliterator = \Transliterator::create('NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'); $id = str_replace(' ', '_', $transliterator->transliterate($projectname)); $filename = str_replace(' ', '_', $transliterator->transliterate($projectname)); if (!empty($_GET['id']) && $this->webStorageIndex->indexHasId($_GET['id'])) { $this->webStorageIndex->setIndexProjectname($_GET['id'], $projectname); $this->webStorageIndex->save(); } elseif (!$this->webStorageIndex->addIndex($id, $projectname, $filename)) { $this->view->setParameter('timesheet', array('projectname' => $projectname, 'filename' => $filename, 'error' => strlen($id) > 3 || strlen($filename) > 3 || strlen($projectname) > 3 ? 'Min length is not right' : 'This project or filename already exists')); } else { $this->webStorageIndex->save(); } } }
/** * Slugify a string * * @param string $text Text to slugify * @param string $default Default return value (override when slugify would return an empty string) * * @return string */ public function slugify($text, $default = '', $replace = array("'"), $delimiter = '-') { if (!empty($replace)) { $text = str_replace($replace, ' ', $text); } // transliterate if (class_exists('Transliterator')) { $text = mb_convert_encoding((string) $text, 'UTF-8', mb_list_encodings()); $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); $text = $transliterator->transliterate($text); } $text = preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $text); $text = strtolower(trim($text, $delimiter)); $text = preg_replace("/[\\/_|+ -]+/", $delimiter, $text); if (empty($text)) { return empty($default) ? '' : $default; } return $text; }
public static function sanitize_id($title) { if (class_exists('Transliterator')) { $transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); if (is_a($transliterator, 'Transliterator')) { $title = $transliterator->transliterate($title); } else { $transliterator = Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC'); if (is_a($transliterator, 'Transliterator')) { $title = $transliterator->transliterate($title); } } } else { if (function_exists('iconv')) { $title = iconv('UTF-8', 'ASCII//TRANSLIT', $title); } } $title = preg_replace('/[^(\\x20-\\x7F)]*/', '', $title); return sanitize_title_with_dashes($title); }
/** * warning requires `yum -y install php-intl` * for transliterator to work * @param $text * * @return mixed|string */ public static function slugify($text) { static $transliteratorExists; if (empty($text)) { return 'n-a'; } if (is_null($transliteratorExists)) { $transliteratorExists = function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();") !== null); } if ($transliteratorExists === true) { return preg_replace('#[ -]+#', '-', transliterator_transliterate('Any-Latin; Latin-ASCII; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();', $text)); } // replace non letter or digits by - $text = preg_replace('~[^\\pL\\d]+~u', '-', $text); $text = trim($text, '-'); $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text); $text = strtolower($text); $text = preg_replace('~[^-\\w]+~', '', $text); if (empty($text)) { return 'n-a'; } return $text; }
/** * Get transliterator. * See http://userguide.icu-project.org/transforms/general for $identifier. * * @param string $identifier Identifier. * @return \Transliterator */ public static function getTransliterator($identifier) { if (false === class_exists('Transliterator')) { return null; } return \Transliterator::create($identifier); }
/** * Transliterator * @return \Touchbase\Utils\Transliterator */ public static function transliterator() { if (self::$transliterator === null) { self::$transliterator = Transliterator::create(); } return self::$transliterator; }
private static function toLowerMb($str) { if (self::$to_lower === null) { self::$to_lower = \Transliterator::create('Any-Lower'); } return self::$to_lower->transliterate($str); }
<?php var_dump(Transliterator::create(str_repeat("x", 20000))); var_dump(intl_get_error_message());
<?php $id = "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();"; $transliterator = Transliterator::create($id); $string = "garçon-étudiant-où-L'école"; echo $transliterator->transliterate($string); // garconetudiantoulecole
<?php ini_set("intl.error_level", E_WARNING); $str = "a U+4E07"; $t = Transliterator::create("hex-any"); echo $t->id, ": ", $t->transliterate($str), "\n"; $u = clone $t; echo $u->id, ": ", $u->transliterate($str), "\n"; echo "Done.\n";
/** * @ignore */ protected static function handleTransform($string, $transform) { $translit = Transliterator::create($transform); $string = $translit->transliterate($string); if (is_cstring($string)) { return $string; } else { assert('false', vs(isset($this), get_defined_vars())); return ""; } }
function slug($str, $isUtf8 = true) { if (class_exists('Transliterator')) { $translit = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'); return preg_replace('/\\s/', '-', $translit->transliterate($str)); } else { if (!$isUtf8) { $str = strtr($str, utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn"); } else { $str = strtr(utf8_decode($str), utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn"); } $str = preg_replace('/[^a-z0-9_-\\s]/', '', strtolower($str)); $str = preg_replace('/[\\s]+/', ' ', trim($str)); $str = str_replace(' ', '-', $str); return $str; } }
<?php ini_set("intl.error_level", E_WARNING); $tr = Transliterator::create("Katakana-Latin"); $orstr = "オーシャンビュー"; $new_str = $tr->transliterate($orstr); $revtr = $tr->createInverse(); $recovstr = $revtr->transliterate($new_str); $revtr2 = transliterator_create_inverse($tr); $recovstr2 = $revtr2->transliterate($new_str); echo $orstr, "\n"; echo $new_str, "\n"; echo $recovstr, "\n"; var_dump(($orstr == $recovstr) == $recovstr2); echo "Done.\n";
/** * Transliterates the input string to an ASCII equivalent string. * Transliteration is language dependent. * @param string $string The input string, UTF-8. * @param string $language Language identificator ('bg', 'ru', ...) * @return string */ public static function to_ascii($string, $language = null) { $language = self::detect_language($language); $string = self::cyr_to_lat($string, $language); static $transliterator_ids = array(); if (INTL_INSTALLED && class_exists('Transliterator', false)) { if (empty($transliterator_ids)) { $transliterator_ids = Transliterator::listIDs(); } $transliterator_id = null; switch ($language) { case 'ar': $transliterator_id = 'Arabic-Latin'; break; case 'el': $transliterator_id = 'Greek-Latin'; break; case 'mk': $transliterator_id = 'Macedonian-Latin/BGN'; break; case 'sr': $transliterator_id = 'Serbian-Latin/BGN'; break; case 'uk': $transliterator_id = 'Ukrainian-Latin/BGN'; break; case 'ko': $transliterator_id = 'Korean-Latin/BGN'; break; case 'th': $transliterator_id = 'Thai-Latin'; break; case 'gu': $transliterator_id = 'Gujarati-Latin'; break; case 'ta': $transliterator_id = 'Tamil-Latin'; break; case 'az': $transliterator_id = 'Azerbaijani-Latin/BGN'; break; } if (!in_array($transliterator_id, $transliterator_ids)) { $transliterator_id = null; } if ($transliterator_id == '') { $transliterator_id = 'Any-Latin; Latin-ASCII'; } else { $transliterator_id .= '; Any-Latin; Latin-ASCII'; } $transliterator = Transliterator::create($transliterator_id); $new_string = @$transliterator->transliterate($string); if ($new_string !== false) { $string = $new_string; } unset($new_string); } static $search; static $replace; if (IS_CODEIGNITER) { if (!isset($search) || !is_array($search)) { // Added by Ivan Tcholakov, 03-OCT-2013. if (file_exists(COMMONPATH . 'config/foreign_chars.php')) { include COMMONPATH . 'config/foreign_chars.php'; } if (file_exists(COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) { include COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php'; } // if (file_exists(APPPATH . 'config/foreign_chars.php')) { include APPPATH . 'config/foreign_chars.php'; } if (file_exists(APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) { include APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php'; } if (empty($foreign_characters) || !is_array($foreign_characters)) { $search = array(); $replace = array(); } else { $search = array_keys($foreign_characters); $replace = array_values($foreign_characters); } } $string = preg_replace($search, $replace, $string); } if (ICONV_INSTALLED) { $string = iconv('UTF-8', 'ASCII//TRANSLIT', $string); } return $string; }
/** * Converts to ASCII. * @param string UTF-8 encoding * @return string ASCII */ public static function toAscii($s) { $s = preg_replace('#[^\\x09\\x0A\\x0D\\x20-\\x7E\\xA0-\\x{2FF}\\x{370}-\\x{10FFFF}]#u', '', $s); $s = strtr($s, '`\'"^~?', ""); $s = str_replace(array("β€�", "β€�", "β€�", "β€�", "β€�", "’", "Β°"), array("", "", "", "", "", "", ""), $s); if (class_exists('Transliterator') && ($transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'))) { $s = $transliterator->transliterate($s); } if (ICONV_IMPL === 'glibc') { $s = str_replace(array("Β»", "Β«", "…", "β„Ά", "Β©", "Β®"), array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s); $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s); // intentionally @ $s = strtr($s, "¥£Ό�§�����―Ή³Ύ��Ί���" . "ΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ�Σ" . "ΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθ" . "ικλμνξοπρςστυφψωϊϋόύώ" . "– ‹—›¦·", "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-."); $s = preg_replace('#[^\\x00-\\x7F]++#', '', $s); } else { $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); // intentionally @ } $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s); return strtr($s, "", '`\'"^~?'); }
<?php ini_set("intl.error_level", E_WARNING); $t = Transliterator::create("[\\p{Bidi_Mirrored}] Hex"); echo transliterator_get_error_code(), "\n"; echo $t->getErrorCode(null), "\n"; echo transliterator_get_error_code(array()), "\n";
public static function urlify($string, $spacer = '-') { $string = trim($string); $string = htmlspecialchars_decode($string, ENT_QUOTES); $string = strip_tags($string); $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string); $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string); // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25 $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; if (function_exists('transliterator_list_ids')) { if (in_array('Latin-ASCII', transliterator_list_ids())) { $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; } } if (function_exists('transliterator_transliterate')) { $string = str_replace('-', ' ', $string); $s = transliterator_transliterate($tranliterator_rule, $string); } else { if (class_exists('Transliterator')) { $string = str_replace('-', ' ', $string); $T = Transliterator::create($tranliterator_rule); $s = $T->transliterate($string); } else { $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string); $s = strtolower($s); $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s); } } $s = preg_replace('/[\\s\\-]+/', $spacer, $s); if (strlen($s) > 0) { return $s; } else { return PerchUtil::urlify_non_translit($string); } }
<?php ini_set("intl.error_level", E_WARNING); $t = Transliterator::create("any-latin"); echo $t->id, "\n"; $t = transliterator_create("any-latin"); echo $t->id, "\n"; echo "Done.\n";
/** * Converts UTF8 into Latin. * * @param $value * * @return mixed */ public static function transliterate($value) { $transId = 'Any-Latin; Latin-ASCII'; if (function_exists('transliterator_transliterate') && ($trans = \Transliterator::create($transId))) { // Use intl by default return $trans->transliterate($value); } return \URLify::transliterate($value); }
/** * Returns an enum value identifier based on the given value name. * * @param string $enumValueName * @param bool $throwExceptionIfInvalidName * * @return string The enum value identifier. Can be empty string if $throwExceptionIfInvalidName = false * * @throws \InvalidArgumentException */ public static function buildEnumValueId($enumValueName, $throwExceptionIfInvalidName = true) { if (strlen($enumValueName) === 0) { if (!$throwExceptionIfInvalidName) { return ''; } throw new \InvalidArgumentException('$enumValueName must not be empty.'); } $tr = \Transliterator::create('Latin; Latin-ASCII; Lower'); if ($tr) { $enumValueName = $tr->transliterate($enumValueName); } $result = preg_replace(['/ +/', '/-+/', '/[^a-z0-9_]+/i', '/_{2,}/'], ['_', '_', '', '_'], trim($enumValueName)); if ($result === '_') { $result = ''; } if (strlen($result) > self::MAX_ENUM_VALUE_ID_LENGTH) { $hash = dechex(crc32($result)); $result = substr($result, 0, self::MAX_ENUM_VALUE_ID_LENGTH - strlen($hash) - 1) . '_' . $hash; } if ($throwExceptionIfInvalidName && strlen($result) === 0) { throw new \InvalidArgumentException(sprintf('The conversion of "%s" to enum value id produces empty string.', $enumValueName)); } return $result; }
/** * @return Transliterator|NULL */ function getTransliterator() { if ($this->transliterator === null && self::$default_use_transliterator) { $this->transliterator = Transliterator::create(); } return $this->transliterator; }
public static function translit($str) { $transliterator = \Transliterator::create('Cyrillic-Latin'); return $transliterator->transliterate($str); }
/** * Transliterate using the Transliterator package. * * @param string $str Input string (UTF-8). * * @return mixed Transliterated string (UTF-8), or false on error. */ protected static function _intlToAscii($str) { if (class_exists('Transliterator')) { if (!isset(self::$_transliterator)) { self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); } if (!is_null(self::$_transliterator)) { /* Returns false on error. */ return self::$_transliterator->transliterate($str); } } return false; }