/** */ protected static function _intlToAscii($str) { if (!isset(self::$_transliterator)) { self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); } return self::$_transliterator->transliterate($str); }
/** * @param string $string * * @return string * * @throws TransliterateException */ public function transliterate($string) { $string = $this->transliterator->transliterate($string); if ($string === false) { throw new TransliterateException($this->transliterator->getErrorMessage(), $this->transliterator->getErrorCode()); } return $string; }
/** * Creates a slug to be used for pretty URLs. * * @param string $string * @param array $replace * @param string $delimiter * @return string * * @throws \Phalcon\Exception */ public static function generate($string, $replace = [], $delimiter = '-') { if (!extension_loaded('intl')) { throw new Exception('intl module not loaded'); } // Save the old locale and set the new locale to UTF-8 $oldLocale = setlocale(LC_ALL, '0'); setlocale(LC_ALL, 'en_US.UTF-8'); // Better to replace given $replace array as index => value // Example $replace['ı' => 'i', 'İ' => 'i']; if (!empty($replace) && is_array($replace)) { $string = str_replace(array_keys($replace), array_values($replace), $string); } $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); $string = $transliterator->transliterate(mb_convert_encoding(htmlspecialchars_decode($string), 'UTF-8', 'auto')); // replace non letter or non digits by - $string = preg_replace('#[^\\pL\\d]+#u', '-', $string); // Trim trailing - $string = trim($string, '-'); $clean = preg_replace('~[^-\\w]+~', '', $string); $clean = strtolower($clean); $clean = preg_replace('#[\\/_|+ -]+#', $delimiter, $clean); $clean = trim($clean, $delimiter); // Revert back to the old locale setlocale(LC_ALL, $oldLocale); return $clean; }
/** * @param string $stringToSantize * @return string * @throws CException */ private function sanitize($stringToSantize) { $slug = Transliterator::create($this->rules)->transliterate($stringToSantize); if (empty($slug)) { throw new CException("Empty santized result. Check input string for encoding, UTF-8 is required."); } return trim(preg_replace('/[-\\s]+/', $this->delimiter, $slug), $this->delimiter); }
public static function slugify($string) { $prepared = str_replace(array('я', 'ю', 'ї', 'є', 'ж', 'ч', 'ш', 'щ', 'ь'), array('ya', 'yu', 'yi', 'ye', 'zh', 'ch', 'sh', 'sch', ''), $string); $transliterated = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();')->transliterate($prepared); $clean = preg_replace('/\\W/i', '-', $transliterated); $replaced = str_replace('--', '-', $clean); $result = preg_replace('/[[:^print:]]/', '', $replaced); return $result; }
public function __construct() { if (class_exists('Transliterator')) { $this->use_transliterator = true; // Use Any-Latin to munge Cyrillic, Kanji, etc // Then convert anything outside the ISO-8859-1 range to nearest ASCII $this->transliterator = Transliterator::create('Any-Latin; [^a-ÿ] Latin-ASCII'); } }
private static function transliterate($string) { $transId = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;'; if (function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create($transId))) { $transString = $transliterator->transliterate($string); } else { $transString = static::toAscii($string); } return preg_replace('/[^A-Za-z0-9_.]/u', '', $transString); }
/** * @param $values * @return mixed|string */ private function generateSlugValue($values) { $usableValues = []; foreach ($values as $fieldName => $fieldValue) { if (!empty($fieldValue)) { $usableValues[] = $fieldValue; } } if (count($usableValues) < 1) { throw new \UnexpectedValueException('Sluggable expects to have at least one usable (non-empty) field from the following: [ ' . implode(array_keys($values), ',') . ' ]'); } // generate the slug itself $sluggableText = implode(' ', $usableValues); $transliterator = new Transliterator(); $sluggableText = $transliterator->transliterate($sluggableText, $this->getSlugDelimiter()); $urlized = strtolower(trim(preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $sluggableText), $this->getSlugDelimiter())); $urlized = preg_replace("/[\\/_|+ -]+/", $this->getSlugDelimiter(), $urlized); return $urlized; }
public function updateFields($data) { if (!$this->getSlug()) { $this->setSlug(Transliterator::slugify($data['title'])); } if (!$this->getTitle()) { $this->setTitle($data['title']); } if (!$this->getHeadTitle()) { $this->setHeadTitle($data['title']); } if (isset($data['display_date'])) { $this->setDisplayDate(1); } else { $this->setDisplayDate(0); } }
public function edit() { if (!empty($_POST['filename']) && !empty($_POST['projectname'])) { $filename = mysql_escape_string($_POST['filename']); $projectname = mysql_escape_string($_POST['projectname']); $transliterator = \Transliterator::create('NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'); $id = str_replace(' ', '_', $transliterator->transliterate($projectname)); $filename = str_replace(' ', '_', $transliterator->transliterate($projectname)); if (!empty($_GET['id']) && $this->webStorageIndex->indexHasId($_GET['id'])) { $this->webStorageIndex->setIndexProjectname($_GET['id'], $projectname); $this->webStorageIndex->save(); } elseif (!$this->webStorageIndex->addIndex($id, $projectname, $filename)) { $this->view->setParameter('timesheet', array('projectname' => $projectname, 'filename' => $filename, 'error' => strlen($id) > 3 || strlen($filename) > 3 || strlen($projectname) > 3 ? 'Min length is not right' : 'This project or filename already exists')); } else { $this->webStorageIndex->save(); } } }
/** * Slugify a string * * @param string $text Text to slugify * @param string $default Default return value (override when slugify would return an empty string) * * @return string */ public function slugify($text, $default = '', $replace = array("'"), $delimiter = '-') { if (!empty($replace)) { $text = str_replace($replace, ' ', $text); } // transliterate if (class_exists('Transliterator')) { $text = mb_convert_encoding((string) $text, 'UTF-8', mb_list_encodings()); $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); $text = $transliterator->transliterate($text); } $text = preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $text); $text = strtolower(trim($text, $delimiter)); $text = preg_replace("/[\\/_|+ -]+/", $delimiter, $text); if (empty($text)) { return empty($default) ? '' : $default; } return $text; }
public static function sanitize_id($title) { if (class_exists('Transliterator')) { $transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); if (is_a($transliterator, 'Transliterator')) { $title = $transliterator->transliterate($title); } else { $transliterator = Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC'); if (is_a($transliterator, 'Transliterator')) { $title = $transliterator->transliterate($title); } } } else { if (function_exists('iconv')) { $title = iconv('UTF-8', 'ASCII//TRANSLIT', $title); } } $title = preg_replace('/[^(\\x20-\\x7F)]*/', '', $title); return sanitize_title_with_dashes($title); }
/** * warning requires `yum -y install php-intl` * for transliterator to work * @param $text * * @return mixed|string */ public static function slugify($text) { static $transliteratorExists; if (empty($text)) { return 'n-a'; } if (is_null($transliteratorExists)) { $transliteratorExists = function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();") !== null); } if ($transliteratorExists === true) { return preg_replace('#[ -]+#', '-', transliterator_transliterate('Any-Latin; Latin-ASCII; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();', $text)); } // replace non letter or digits by - $text = preg_replace('~[^\\pL\\d]+~u', '-', $text); $text = trim($text, '-'); $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text); $text = strtolower($text); $text = preg_replace('~[^-\\w]+~', '', $text); if (empty($text)) { return 'n-a'; } return $text; }
/** * Converts to ASCII. * @param string UTF-8 encoding * @return string ASCII */ public static function toAscii($s) { $s = preg_replace('#[^\\x09\\x0A\\x0D\\x20-\\x7E\\xA0-\\x{2FF}\\x{370}-\\x{10FFFF}]#u', '', $s); $s = strtr($s, '`\'"^~?', ""); $s = str_replace(array("β€�", "β€�", "β€�", "β€�", "β€�", "’", "Β°"), array("", "", "", "", "", "", ""), $s); if (class_exists('Transliterator') && ($transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'))) { $s = $transliterator->transliterate($s); } if (ICONV_IMPL === 'glibc') { $s = str_replace(array("Β»", "Β«", "…", "β„Ά", "Β©", "Β®"), array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s); $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s); // intentionally @ $s = strtr($s, "¥£Ό�§�����―Ή³Ύ��Ί���" . "ΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ�Σ" . "ΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθ" . "ικλμνξοπρςστυφψωϊϋόύώ" . "– ‹—›¦·", "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-."); $s = preg_replace('#[^\\x00-\\x7F]++#', '', $s); } else { $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); // intentionally @ } $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s); return strtr($s, "", '`\'"^~?'); }
public static function urlify($string, $spacer = '-') { $string = trim($string); $string = htmlspecialchars_decode($string, ENT_QUOTES); $string = strip_tags($string); $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string); $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string); // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25 $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; if (function_exists('transliterator_list_ids')) { if (in_array('Latin-ASCII', transliterator_list_ids())) { $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; } } if (function_exists('transliterator_transliterate')) { $string = str_replace('-', ' ', $string); $s = transliterator_transliterate($tranliterator_rule, $string); } else { if (class_exists('Transliterator')) { $string = str_replace('-', ' ', $string); $T = Transliterator::create($tranliterator_rule); $s = $T->transliterate($string); } else { $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string); $s = strtolower($s); $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s); } } $s = preg_replace('/[\\s\\-]+/', $spacer, $s); if (strlen($s) > 0) { return $s; } else { return PerchUtil::urlify_non_translit($string); } }
/** * Transliterates the input string to an ASCII equivalent string. * Transliteration is language dependent. * @param string $string The input string, UTF-8. * @param string $language Language identificator ('bg', 'ru', ...) * @return string */ public static function to_ascii($string, $language = null) { $language = self::detect_language($language); $string = self::cyr_to_lat($string, $language); static $transliterator_ids = array(); if (INTL_INSTALLED && class_exists('Transliterator', false)) { if (empty($transliterator_ids)) { $transliterator_ids = Transliterator::listIDs(); } $transliterator_id = null; switch ($language) { case 'ar': $transliterator_id = 'Arabic-Latin'; break; case 'el': $transliterator_id = 'Greek-Latin'; break; case 'mk': $transliterator_id = 'Macedonian-Latin/BGN'; break; case 'sr': $transliterator_id = 'Serbian-Latin/BGN'; break; case 'uk': $transliterator_id = 'Ukrainian-Latin/BGN'; break; case 'ko': $transliterator_id = 'Korean-Latin/BGN'; break; case 'th': $transliterator_id = 'Thai-Latin'; break; case 'gu': $transliterator_id = 'Gujarati-Latin'; break; case 'ta': $transliterator_id = 'Tamil-Latin'; break; case 'az': $transliterator_id = 'Azerbaijani-Latin/BGN'; break; } if (!in_array($transliterator_id, $transliterator_ids)) { $transliterator_id = null; } if ($transliterator_id == '') { $transliterator_id = 'Any-Latin; Latin-ASCII'; } else { $transliterator_id .= '; Any-Latin; Latin-ASCII'; } $transliterator = Transliterator::create($transliterator_id); $new_string = @$transliterator->transliterate($string); if ($new_string !== false) { $string = $new_string; } unset($new_string); } static $search; static $replace; if (IS_CODEIGNITER) { if (!isset($search) || !is_array($search)) { // Added by Ivan Tcholakov, 03-OCT-2013. if (file_exists(COMMONPATH . 'config/foreign_chars.php')) { include COMMONPATH . 'config/foreign_chars.php'; } if (file_exists(COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) { include COMMONPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php'; } // if (file_exists(APPPATH . 'config/foreign_chars.php')) { include APPPATH . 'config/foreign_chars.php'; } if (file_exists(APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php')) { include APPPATH . 'config/' . ENVIRONMENT . '/foreign_chars.php'; } if (empty($foreign_characters) || !is_array($foreign_characters)) { $search = array(); $replace = array(); } else { $search = array_keys($foreign_characters); $replace = array_values($foreign_characters); } } $string = preg_replace($search, $replace, $string); } if (ICONV_INSTALLED) { $string = iconv('UTF-8', 'ASCII//TRANSLIT', $string); } return $string; }
<?php ini_set("intl.error_level", E_WARNING); $tr = Transliterator::create("Katakana-Latin"); $orstr = "オーシャンビュー"; $new_str = $tr->transliterate($orstr); $revtr = $tr->createInverse(); $recovstr = $revtr->transliterate($new_str); $revtr2 = transliterator_create_inverse($tr); $recovstr2 = $revtr2->transliterate($new_str); echo $orstr, "\n"; echo $new_str, "\n"; echo $recovstr, "\n"; var_dump(($orstr == $recovstr) == $recovstr2); echo "Done.\n";
<?php ini_set("intl.error_level", E_WARNING); $str = "a U+4E07"; $t = Transliterator::create("hex-any"); echo $t->id, ": ", $t->transliterate($str), "\n"; $u = clone $t; echo $u->id, ": ", $u->transliterate($str), "\n"; echo "Done.\n";
/** * @ignore */ protected static function handleTransform($string, $transform) { $translit = Transliterator::create($transform); $string = $translit->transliterate($string); if (is_cstring($string)) { return $string; } else { assert('false', vs(isset($this), get_defined_vars())); return ""; } }
/** * 入力をアーカイブ中で妥当な拡張子を除くファイル名に変換します。 * @param string $filenameWithoutExtension 拡張子を除くファイル名。 * @return string 制御文字・空白文字のみで構成されていた場合、ランダムな文字列生成します。 */ public function convertToValidFilenameWithoutExtensionInArchives(string $filenameWithoutExtension) : string { $asciiString = $this->preventWindowsReserved(mb_substr(trim(preg_replace('/[^0-9_a-z]+/u', '-', \Stringy\StaticStringy::dasherize(Transliterator::translateUsingLatinAlphabet(preg_replace('/^\\p{Z}+|\\p{C}+|\\p{Z}+$/u', '', \Normalizer::isNormalized($filenameWithoutExtension) ? $filenameWithoutExtension : \Normalizer::normalize($filenameWithoutExtension))))), '-'), 0, static::MAX_LENGTH)); return $asciiString === '' ? $this->generateRandomFilename() : $asciiString; }
/** * @param string $kanjiAndKanaString * @param string $latinAlphabetString * @dataProvider stringProvider */ public function testTranslateUsingLatinAlphabet(string $kanjiAndKanaString, string $latinAlphabetString) { $this->assertSame($latinAlphabetString, Transliterator::translateUsingLatinAlphabet($kanjiAndKanaString)); }
function slug($str, $isUtf8 = true) { if (class_exists('Transliterator')) { $translit = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'); return preg_replace('/\\s/', '-', $translit->transliterate($str)); } else { if (!$isUtf8) { $str = strtr($str, utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn"); } else { $str = strtr(utf8_decode($str), utf8_decode("ÀÁÂÃÄÅàáâãäåÇçÒÓÔÕÖØòóôõöøÈÉÊËèéêëÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ"), "AAAAAAaaaaaaCcOOOOOOooooooEEEEeeeeIIIIiiiiUUUUuuuuyNn"); } $str = preg_replace('/[^a-z0-9_-\\s]/', '', strtolower($str)); $str = preg_replace('/[\\s]+/', ' ', trim($str)); $str = str_replace(' ', '-', $str); return $str; } }
<?php var_dump(Transliterator::create(str_repeat("x", 20000))); var_dump(intl_get_error_message());
<?php $id = "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();"; $transliterator = Transliterator::create($id); $string = "garçon-étudiant-où-L'école"; echo $transliterator->transliterate($string); // garconetudiantoulecole
<?php ini_set("intl.error_level", E_WARNING); $t = Transliterator::create("any-latin"); echo $t->id, "\n"; $t = transliterator_create("any-latin"); echo $t->id, "\n"; echo "Done.\n";
<?php $transliterator = Transliterator::createFromRules(':: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;'); $input = 'àòùìéëü'; $output = $transliterator->transliterate($input); echo $input; echo '<br>'; echo $output;
/** * Returns an enum value identifier based on the given value name. * * @param string $enumValueName * @param bool $throwExceptionIfInvalidName * * @return string The enum value identifier. Can be empty string if $throwExceptionIfInvalidName = false * * @throws \InvalidArgumentException */ public static function buildEnumValueId($enumValueName, $throwExceptionIfInvalidName = true) { if (strlen($enumValueName) === 0) { if (!$throwExceptionIfInvalidName) { return ''; } throw new \InvalidArgumentException('$enumValueName must not be empty.'); } $tr = \Transliterator::create('Latin; Latin-ASCII; Lower'); if ($tr) { $enumValueName = $tr->transliterate($enumValueName); } $result = preg_replace(['/ +/', '/-+/', '/[^a-z0-9_]+/i', '/_{2,}/'], ['_', '_', '', '_'], trim($enumValueName)); if ($result === '_') { $result = ''; } if (strlen($result) > self::MAX_ENUM_VALUE_ID_LENGTH) { $hash = dechex(crc32($result)); $result = substr($result, 0, self::MAX_ENUM_VALUE_ID_LENGTH - strlen($hash) - 1) . '_' . $hash; } if ($throwExceptionIfInvalidName && strlen($result) === 0) { throw new \InvalidArgumentException(sprintf('The conversion of "%s" to enum value id produces empty string.', $enumValueName)); } return $result; }
/** * @return Transliterator|NULL */ function getTransliterator() { if ($this->transliterator === null && self::$default_use_transliterator) { $this->transliterator = Transliterator::create(); } return $this->transliterator; }
public static function translit($str) { $transliterator = \Transliterator::create('Cyrillic-Latin'); return $transliterator->transliterate($str); }
/** * Transliterate using the Transliterator package. * * @param string $str Input string (UTF-8). * * @return mixed Transliterated string (UTF-8), or false on error. */ protected static function _intlToAscii($str) { if (class_exists('Transliterator')) { if (!isset(self::$_transliterator)) { self::$_transliterator = Transliterator::create('Any-Latin; Latin-ASCII'); } if (!is_null(self::$_transliterator)) { /* Returns false on error. */ return self::$_transliterator->transliterate($str); } } return false; }