/** * {@inheritdoc} */ public static function slugify($string, $separator = null) { $separator = null !== $separator ? $separator : (null !== self::$separator ? self::$separator : '-'); $slug = trim(strip_tags($string)); $slug = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII; Lower();', $slug); $slug = preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $slug); $slug = preg_replace("/[\\/_|+ -]+/", $separator, $slug); $slug = trim($slug, $separator); return $slug; }
/** * {@inheritdoc} */ public function slugify($string, $separator = null) { $separator = $separator ?: $this->separator; $slug = trim(strip_tags($string)); $slug = transliterator_transliterate("NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII; Lower();", $slug); $slug = preg_replace("/[^a-zA-Z0-9\\/_|+ -]/", '', $slug); $slug = preg_replace("/[\\/_|+ -]+/", $separator, $slug); $slug = trim($slug, $separator); return $slug; }
/** * Return old good slug * * @param string $string * @param string $replacement * @param bool $lowercase * * @return string */ public static function slug($string, $replacement = '-', $lowercase = true) { if (extension_loaded('intl') === true) { $options = 'Any-Latin; NFKD; [:Punctuation:] Remove; [^\\u0000-\\u007E] Remove'; $string = transliterator_transliterate($options, $string); $string = preg_replace('/[-=\\s]+/', $replacement, $string); } else { $string = str_replace(array_keys(static::$transliteration), static::$transliteration, $string); $string = preg_replace('/[^\\p{L}\\p{Nd}]+/u', $replacement, $string); } $string = trim($string, $replacement); return $lowercase ? strtolower($string) : $string; }
function strip_diacriticals($in) { if (function_exists('transliterator_transliterate')) { // PHP 5.4 + intl return transliterator_transliterate('Any-Latin; Latin-ASCII', $in); } if (function_exists('normalizer_normalize')) { // PHP 5.3 + intl return normalizer_normalize(preg_replace('/\\p{Mn}+/u', '', normalizer_normalize($in, Normalizer::FORM_D)), Normalizer::FORM_C); } if (function_exists('iconv') && ICONV_IMPL == 'glibc') { return iconv('UTF-8', 'ASCII//TRANSLIT', $in); } // version incomplète (limitée au latin1) $patterns = array('~&([A-za-z])(?:grave|acute|circ|tilde|uml|ring|cedil|slash|caron);~' => '\\1', '~&([A-za-z]{2})lig;~' => '\\1', '~&[^;]+;~' => ''); $out = htmlentities($in, ENT_NOQUOTES, 'UTF-8'); $out = preg_replace(array_keys($patterns), array_values($patterns), $out); return $out; }
public static function transliterate($string) { // needs intl extension if (function_exists('transliterator_transliterate')) { $string = transliterator_transliterate("Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove", $string); $string = preg_replace('/[^\\pL\\d._]+/u', '-', $string); $string = preg_replace('/[-\\s]+/', '-', $string); } else { // uses iconv $string = preg_replace('~[^\\pL0-9_\\.]+~u', '-', $string); // substitutes anything but letters, numbers and '-' with separator $string = trim($string, '-'); if (function_exists('iconv')) { $string = iconv('utf-8', 'us-ascii//TRANSLIT', $string); // TRANSLIT does the whole job } $string = preg_replace('~[^-a-zA-Z0-9_\\.]+~', '', $string); // keep only letters, numbers, '_' and separator } $string = trim($string, '-'); return $string; }
protected function formatReadableId($id) { $id = transliterator_transliterate("Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();", $id); $id = preg_replace('/([^a-z0-9])/iu', '-', $id); $id = preg_replace('/-+/u', '-', $id); $id = trim($id, '-'); return $id; }
public static function transliterate($string) { // the reason for using this rather "exotic" function in contrary to // iconv is, that iconv is very unstable. It relies on the correct // linked library, which means it works different on OSX than on Linux // also iconv + setlocale is not thread safe, so if you are using IIS // php-fpm, fastcgi or similar it can/will break return transliterator_transliterate('Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove', $string); }
private static function transliterate($string) { if (function_exists('transliterator_transliterate')) { $transString = transliterator_transliterate("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();", $string); } else { $transString = static::toAscii($string); } return preg_replace('/[^A-Za-z0-9_.]/u', '', $transString); }
public static function transform($string) { if (!function_exists('transliterator_transliterate') || !function_exists('transliterator_list_ids')) { return self::simpleTransform($string); } $transliteratorIds = transliterator_list_ids(); if (!in_array('Any-Latin', $transliteratorIds) || !in_array('Latin-ASCII', $transliteratorIds)) { return self::simpleTransform($string); } return transliterator_transliterate('Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove', $string); }
/** * Creates a slug to be used for pretty URLs * * @param $string * @param string $delimiter * @return string */ public static function generate($string, $delimiter = '-') { if (function_exists('transliterator_transliterate')) { $string = transliterator_transliterate('Any-Latin; Latin-ASCII; [:Punctuation:] Remove; Lower()', $string); } else { $string = mb_strtolower(preg_replace('/[^a-zA-Z0-9]/', '', $string)); } $string = preg_replace('/[-\\s]+/', $delimiter, $string); $string = preg_replace('/^[-\\s]+/', '', $string); $string = preg_replace('/[-\\s]+$/', '', $string); return trim($string, $delimiter); }
/** * Return old good slug * * @param string $string * @param string $replacement * @param bool $lowercase * * @return string */ public static function slug($string, $replacement = '-', $lowercase = true) { if (extension_loaded('intl') === true) { $options = 'Any-Latin; NFKD'; $string = transliterator_transliterate($options, $string); $string = preg_replace('/[^a-zA-Z0-9=\\s—–-]+/u', '', $string); $string = preg_replace('/[=\\s—–-]+/u', $replacement, $string); } else { $string = str_replace(array_keys(static::$transliteration), static::$transliteration, $string); $string = preg_replace('/[^\\p{L}\\p{Nd}]+/u', $replacement, $string); } $string = trim($string, $replacement); return $lowercase ? strtolower($string) : $string; }
/** * {@inheritDoc} */ public function transformForSeo($services = null) { if (null == $services) { return ""; } if (!($services instanceof \ArrayAccess or is_array($services) or $services instanceof \Traversable or $services instanceof \Countable)) { throw new \InvalidArgumentException('The given argument must be array accessible'); } $servicesSlug = array(); foreach ($services as $service) { $servicesSlug[] = str_replace(' ', '', transliterator_transliterate("Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();", $service->getName())); } return count($servicesSlug) ? implode('+', $servicesSlug) : 'noservices'; }
/** * Convert to ASCII. * * @since 150424 Multibyte support. * * @param mixed $value Any input value. * * @return string|array|object Output value. */ public function toAscii($value) { if (is_array($value) || is_object($value)) { foreach ($value as $_key => &$_value) { $_value = $this->toAscii($_value); } // unset($_key, $_value); return $value; } if (!($string = (string) $value)) { return $string; // Nothing to do. } return (string) transliterator_transliterate('Any-Latin; Latin-ASCII', $string); }
public static function transliterate($string, $options = ['UPPER']) { $op = ''; if (array_key_exists('UPPER', $options) || in_array('UPPER', $options)) { $op = '; Upper()'; } if (array_key_exists('LOWER', $options) || in_array('LOWER', $options)) { $op = '; Lower()'; } //$string = mb_convert_case($string, MB_CASE_UPPER, "UTF-8"); $string = transliterator_transliterate('Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove' . $op, $string); $string = trim($string); // TODO verify if we should remove - //$string = str_replace('-','',$string); return $string; }
/** * @param mixed $value * @param string $replaceChar * @return string */ public function filter($value, $replaceChar = '-') { // Setting en_US locale to replace diacritic characters. setlocale(LC_CTYPE, 'en_US.UTF-8'); // Do not filter non-string values if (!is_string($value)) { return $value; } $string = str_replace(array('&', '&'), 'and', $value); $string = transliterator_transliterate('Any-Latin;Latin-ASCII;', $string); $string = @iconv('UTF-8', 'ASCII//TRANSLIT', $string); $string = preg_replace('/[^a-z0-9]+/i', $replaceChar, $string); $string = preg_replace('/\\_+/', $replaceChar, $string); $string = trim($string, $replaceChar); $string = strtolower($string); return $string; }
/** * Filename translitaration renamin * @param \Iphp\FileStoreBundle\Mapping\PropertyMapping $propertyMapping * @param $name * @return string */ public function translitRename(PropertyMapping $propertyMapping, $name) { if (function_exists('transliterator_transliterate')) { $name = transliterator_transliterate("Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove", $name); $name = preg_replace('/[^\\pL\\d.]+/u', '-', $name); $name = preg_replace('/[-\\s]+/', '-', $name); } else { $iso = array("Є" => "YE", "І" => "I", "Ѓ" => "G", "і" => "i", "№" => "N", "є" => "ye", "ѓ" => "g", "А" => "A", "Б" => "B", "В" => "V", "Г" => "G", "Д" => "D", "Е" => "E", "Ё" => "e", "Ж" => "z", "З" => "Z", "И" => "I", "Й" => "J", "К" => "K", "Л" => "L", "М" => "M", "Н" => "N", "О" => "O", "П" => "P", "Р" => "R", "С" => "S", "Т" => "T", "У" => "U", "Ф" => "F", "Х" => "H", "Ц" => "C", "Ч" => "C", "Ш" => "S", "Щ" => "s", "Ъ" => "", "Ы" => "Y", "Ь" => "", "Э" => "E", "Ю" => "U", "Я" => "a", "а" => "a", "б" => "b", "в" => "v", "г" => "g", "д" => "d", "е" => "e", "ё" => "e", "ж" => "z", "з" => "z", "и" => "i", "й" => "j", "к" => "k", "л" => "l", "м" => "m", "н" => "n", "о" => "o", "п" => "p", "р" => "r", "с" => "s", "т" => "t", "у" => "u", "ф" => "f", "х" => "h", "ц" => "c", "ч" => "c", "ш" => "s", "щ" => "s", "ъ" => "", "ы" => "y", "ь" => "", "э" => "e", "ю" => "u", "я" => "a", "«" => "", "»" => "", "—" => "-"); $name = strtr($name, $iso); $name = preg_replace('/[^\\pL\\d.]+/u', '-', $name); $name = preg_replace('/[-\\s]+/', '-', $name); // transliterate if (function_exists('iconv')) { $name = iconv('utf-8', 'ASCII//TRANSLIT//IGNORE', $name); } $name = preg_replace("/[^0-9A-Za-z-_ .]/", "", $name); } $name = trim($name, '-'); $name = strtolower($name); return $name; }
/** * warning requires `yum -y install php-intl` * for transliterator to work * @param $text * * @return mixed|string */ public static function slugify($text) { static $transliteratorExists; if (empty($text)) { return 'n-a'; } if (is_null($transliteratorExists)) { $transliteratorExists = function_exists('transliterator_transliterate') && ($transliterator = \Transliterator::create("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();") !== null); } if ($transliteratorExists === true) { return preg_replace('#[ -]+#', '-', transliterator_transliterate('Any-Latin; Latin-ASCII; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();', $text)); } // replace non letter or digits by - $text = preg_replace('~[^\\pL\\d]+~u', '-', $text); $text = trim($text, '-'); $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text); $text = strtolower($text); $text = preg_replace('~[^-\\w]+~', '', $text); if (empty($text)) { return 'n-a'; } return $text; }
/** * Transliterate string. * * @param string $string String to transliterate. * @param string|null $transliteratorId Transliterator identifier. If null * Text::$_defaultTransliteratorId will be used. * @return string * @see http://php.net/manual/en/transliterator.transliterate.php */ public static function transliterate($string, $transliteratorId = null) { $transliteratorId = $transliteratorId ?: static::$_defaultTransliteratorId; return transliterator_transliterate($transliteratorId, $string); }
private function cleanFileName($originalName) { $transliteratedName = transliterator_transliterate('Any-Latin; Latin-ASCII; Lower()', $originalName); $strippedName = preg_replace('/[^\\da-z.\\-\\s]+/', '', $transliteratedName); return trim(preg_replace('/[-\\s]+/', '-', $strippedName)); }
public static function utf8_latin_to_ascii($subject) { static $CHARS = NULL; if (is_null($CHARS)) { $CHARS = array('À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'ß' => 's', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'ÿ' => 'y', 'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C', 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c', 'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E', 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e', 'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G', 'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i', 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I', 'ı' => 'i', 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l', 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'l', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N', 'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ʼn' => 'n', 'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE', 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r', 'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S', 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't', 'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U', 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w', 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z', 'Ž' => 'Z', 'ž' => 'z', 'ſ' => 's', 'ƒ' => 'f', 'Ơ' => 'O', 'ơ' => 'o', 'Ư' => 'U', 'ư' => 'u', 'Ǎ' => 'A', 'ǎ' => 'a', 'Ǐ' => 'I', 'ǐ' => 'i', 'Ǒ' => 'O', 'ǒ' => 'o', 'Ǔ' => 'U', 'ǔ' => 'u', 'Ǖ' => 'U', 'ǖ' => 'u', 'Ǘ' => 'U', 'ǘ' => 'u', 'Ǚ' => 'U', 'ǚ' => 'u', 'Ǜ' => 'U', 'ǜ' => 'u', 'Ǻ' => 'A', 'ǻ' => 'a', 'Ǽ' => 'AE', 'ǽ' => 'ae', 'Ǿ' => 'O', 'ǿ' => 'o'); } if (function_exists('transliterator_transliterate')) { if (is_array($subject)) { array_walk($subject, function (&$string) { $string = WFUtility::utf8_latin_to_ascii($string); }); return $subject; } $transformed = transliterator_transliterate('Any-Latin; Latin-ASCII;', $subject); if ($transformed !== false) { return $transformed; } return str_replace(array_keys($CHARS), array_values($CHARS), $subject); } return str_replace(array_keys($CHARS), array_values($CHARS), $subject); }
/** * Cleanup filename * * @param string $str * @param bool $transliteration * @param bool $convert_spaces * @param string $replace_with * @param bool $is_folder * * @return string */ function fix_filename($str, $transliteration, $convert_spaces = false, $replace_with = "_", $is_folder = false) { if ($convert_spaces) { $str = str_replace(' ', $replace_with, $str); } if ($transliteration) { if (!mb_detect_encoding($str, 'UTF-8', true)) { $str = utf8_encode($str); } if (function_exists('transliterator_transliterate')) { $str = transliterator_transliterate('Any-Latin; Latin-ASCII', $str); } else { $str = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $str); } $str = preg_replace("/[^a-zA-Z0-9\\.\\[\\]_| -]/", '', $str); } $str = str_replace(array('"', "'", "/", "\\"), "", $str); $str = strip_tags($str); // Empty or incorrectly transliterated filename. // Here is a point: a good file UNKNOWN_LANGUAGE.jpg could become .jpg in previous code. // So we add that default 'file' name to fix that issue. if (strpos($str, '.') === 0 && $is_folder === false) { $str = 'file' . $str; } return trim($str); }
/** * @return array */ public function getResults() { if ($this->results === null) { $em = $this->entityManager; $hotelRepo = $em->getRepository('SehBundle:Hotel'); $hotelPreQuery = $em->createQueryBuilder('h')->select('h.id')->from('SehBundle:Hotel', 'h')->where('h.reservitId is not null')->andWhere('h.active = :active')->andWhere('h.adherent = :adherent')->andWhere('h.reservitId <> \'\'')->setParameter(':active', true)->setParameter(':adherent', true); if ($this->brands) { $hotelPreQuery->join('h.brand', 'b', Expr\Join::WITH, 'b.id in (' . implode(', ', $this->brands) . ')'); } if ($this->radius and $this->longitude and $this->latitude) { if ($this->city) { $hotelPreQuery->join('h.cityDistances', 'ciDi', 'WITH', 'ciDi.city = :city AND ciDi.distance <= :radius')->setParameter(':radius', $this->radius)->setParameter(':city', $this->city); } else { $this->addDistanceCriteria($hotelPreQuery, $this->latitude, $this->longitude, $this->radius); } } if ($this->department) { $hotelPreQuery->join('h.department', 'd')->andWhere('d.id = :department')->setParameter(':department', $this->department); } if ($this->region) { $hotelPreQuery->join('h.region', 'r')->andWhere('r.id = :region')->setParameter(':region', $this->region); } if ($this->country) { $hotelPreQuery->join('h.region', 'r')->join('r.country', 'co')->andWhere('co.id = :country')->setParameter(':country', $this->country); } if ((!$this->radius or !$this->longitude or !$this->latitude) and !$this->department and !$this->region and !$this->country) { $searchString = transliterator_transliterate("Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();", $this->searchString); $hotelPreQuery->andWhere('h.name LIKE :search')->setParameter(':search', sprintf('%%%s%%', $searchString)); } $hotelIdQueryResults = $hotelPreQuery->getQuery()->getResult(); $hotelIds = array(); foreach ($hotelIdQueryResults as $hotelId) { $hotelIds[] = $hotelId['id']; } $searchResults = array(); if ($hotelIds) { $hotelQuery = $hotelRepo->createQueryBuilder('h'); $hotelQuery->select('h as hotel, b, r, co, c, d, hl, ht, hs')->join('h.brand', 'b')->join('h.region', 'r')->join('r.country', 'co')->join('h.city', 'c')->leftJoin('h.department', 'd')->leftJoin('h.highlights', 'hl')->leftJoin('h.thematics', 'ht')->leftJoin('h.services', 'hs')->where('h.id in (:hotelIds)')->setParameter(':hotelIds', $hotelIds); if ($this->radius and $this->longitude and $this->latitude) { $hotelQuery = $this->addDistanceSelect($hotelQuery, $this->latitude, $this->longitude, $this->radius); } $results = $hotelQuery->getQuery()->getResult(); $searchResults = array(); if ($this->radius and $this->longitude and $this->latitude) { foreach ($results as $result) { $searchResults[$result['hotel']->getReservitId()] = new SearchResult($result['hotel'], $result['distance']); } } else { foreach ($results as $result) { $searchResults[$result['hotel']->getReservitId()] = new SearchResult($result['hotel']); } } } $this->results = $searchResults; } return $this->results; }
/** * Replaces non-ASCII characters with an ASCII approximation. * * @param string $string * @param string $transliterator * @return string */ public static function transliterate($string, $transliterator = "Any-Latin; Latin-ASCII; [\\u0080-\\u7fff] remove;") { return transliterator_transliterate($transliterator, $string); }
/** * Create a slug for use in URLs from a given string. Any non-alphanumeric characters will be converted to "-". * * @param string $string The string to convert. * @return string The slug. * * @package esoTalk */ function slug($string) { // If there are any characters other than basic alphanumeric, space, punctuation, then we need to attempt transliteration. if (preg_match("/[^ -]/", $string)) { // Thanks to krakos for this code! http://esotalk.org/forum/582-unicode-in-usernames-and-url-s if (function_exists('transliterator_transliterate')) { // Unicode decomposition rules states that these cannot be decomposed, hence // we have to deal with them manually. Note: even though “scharfes s” is commonly // transliterated as “sz”, in this context “ss” is preferred, as it's the most popular // method among German speakers. $src = array('œ', 'æ', 'đ', 'ø', 'ł', 'ß', 'Œ', 'Æ', 'Đ', 'Ø', 'Ł'); $dst = array('oe', 'ae', 'd', 'o', 'l', 'ss', 'OE', 'AE', 'D', 'O', 'L'); $string = str_replace($src, $dst, $string); // Using transliterator to get rid of accents and convert non-Latin to Latin $string = transliterator_transliterate("Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();", $string); } else { // A fallback to old method. // Convert special Latin letters and other characters to HTML entities. $string = htmlentities($string, ENT_NOQUOTES, "UTF-8"); // With those HTML entities, either convert them back to a normal letter, or remove them. $string = preg_replace(array("/&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml|caron);/i", "/&[^;]{2,6};/"), array("\$1", " "), $string); } } // Allow plugins to alter the slug. ET::trigger("slug", array(&$string)); // Now replace non-alphanumeric characters with a hyphen, and remove multiple hyphens. $slug = str_replace(' ', '-', trim(preg_replace('~[^\\pL\\d]+~u', ' ', mb_strtolower($string, "UTF-8")))); return mb_substr($slug, 0, 63, "UTF-8"); }
function common_slugify($str) { // php5-intl is highly recommended... if (!function_exists('transliterator_transliterate')) { $str = preg_replace('/[^\\pL\\pN]/u', '', $str); $str = mb_convert_case($str, MB_CASE_LOWER, 'UTF-8'); $str = substr($str, 0, 64); return $str; } $str = transliterator_transliterate('Any-Latin;' . 'NFD;' . '[:Nonspacing Mark:] Remove;' . 'NFC;' . '[:Punctuation:] Remove;' . 'Lower();' . 'Latin-ASCII;', $str); return preg_replace('/[^\\pL\\pN]/', '', $str); }
/** * Returns transliterated version of a string. * * If intl extension isn't available uses fallback that converts latin characters only * and removes the rest. You may customize characters map via $transliteration property * of the helper. * * @param string $string input string * @param string|\Transliterator $transliterator either a [[Transliterator]] or a string * from which a [[Transliterator]] can be built. * @return string * @since 2.0.7 this method is public. */ public static function transliterate($string, $transliterator = null) { if (static::hasIntl()) { if ($transliterator === null) { $transliterator = static::$transliterator; } return transliterator_transliterate($transliterator, $string); } else { return str_replace(array_keys(static::$transliteration), static::$transliteration, $string); } }
/** * Cleans some string to be used as filename * * @param string $str the string to be cleaned * @param bool $useTransLiteration * @param array $replace * @param string $delimiter * * @return string */ public function cleanString($str, $useTransLiteration = false, $replace = array(), $delimiter = '_') { setlocale(LC_ALL, 'en_US.UTF8'); if (!empty($replace)) { $str = str_replace((array) $replace, ' ', $str); } if ($useTransLiteration) { if (function_exists('transliterator_transliterate')) { $str = transliterator_transliterate('Accents-Any', $str); } else { $str = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $str); } } $str = strtr($str, "ÀÂÁáàâÔÒÓóôòÊÈÉéèêÍÎÌíîìÛÙÚúûùŷÑñ", "AAAaaaOOOoooEEEeeeIIIiiiUUUuuuyNn"); $str = preg_replace("/[^a-zA-Z0-9@\\.\\/_|+ -](\\.)?/is", '', $str); $str = strtolower(trim($str, '-')); $str = preg_replace("/[\\/|+ -]+/", $delimiter, $str); $str = str_replace(array('"', "'", "/", "\\"), "", $str); $str = strip_tags($str); if (strpos($str, '.') === 0) { $str = time() . $str; } return trim($str); }
/** * Transliterates strings using the PHP 5.4.0+ built-in transliterator. * * @param string $string The string to transliterate * * @return string The transliterated string */ public function nativeTransliteration($string) { return transliterator_transliterate("Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC;", $string); }
function fix_filename($str, $transliteration) { if ($transliteration) { if (function_exists('transliterator_transliterate')) { $str = transliterator_transliterate('Accents-Any', $str); } else { $str = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $str); } $str = preg_replace("/[^a-zA-Z0-9\\.\\[\\]_| -]/", '', $str); } $str = str_replace(array('"', "'", "/", "\\"), "", $str); $str = strip_tags($str); // Empty or incorrectly transliterated filename. // Here is a point: a good file UNKNOWN_LANGUAGE.jpg could become .jpg in previous code. // So we add that default 'file' name to fix that issue. if (strpos($str, '.') === 0) { $str = 'file' . $str; } return trim($str); }
public static function urlify($string, $spacer = '-') { $string = trim($string); $string = htmlspecialchars_decode($string, ENT_QUOTES); $string = strip_tags($string); $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string); $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string); // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25 $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; if (function_exists('transliterator_list_ids')) { if (in_array('Latin-ASCII', transliterator_list_ids())) { $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; } } if (function_exists('transliterator_transliterate')) { $string = str_replace('-', ' ', $string); $s = transliterator_transliterate($tranliterator_rule, $string); } else { if (class_exists('Transliterator')) { $string = str_replace('-', ' ', $string); $T = Transliterator::create($tranliterator_rule); $s = $T->transliterate($string); } else { $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string); $s = strtolower($s); $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s); } } $s = preg_replace('/[\\s\\-]+/', $spacer, $s); if (strlen($s) > 0) { return $s; } else { return PerchUtil::urlify_non_translit($string); } }