public static function transform($string) { if (!function_exists('transliterator_transliterate') || !function_exists('transliterator_list_ids')) { return self::simpleTransform($string); } $transliteratorIds = transliterator_list_ids(); if (!in_array('Any-Latin', $transliteratorIds) || !in_array('Latin-ASCII', $transliteratorIds)) { return self::simpleTransform($string); } return transliterator_transliterate('Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove', $string); }
public static function urlify($string, $spacer = '-') { $string = trim($string); $string = htmlspecialchars_decode($string, ENT_QUOTES); $string = strip_tags($string); $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string); $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string); // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25 $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; if (function_exists('transliterator_list_ids')) { if (in_array('Latin-ASCII', transliterator_list_ids())) { $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();'; } } if (function_exists('transliterator_transliterate')) { $string = str_replace('-', ' ', $string); $s = transliterator_transliterate($tranliterator_rule, $string); } else { if (class_exists('Transliterator')) { $string = str_replace('-', ' ', $string); $T = Transliterator::create($tranliterator_rule); $s = $T->transliterate($string); } else { $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string); $s = strtolower($s); $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s); } } $s = preg_replace('/[\\s\\-]+/', $spacer, $s); if (strlen($s) > 0) { return $s; } else { return PerchUtil::urlify_non_translit($string); } }
<?php ini_set("intl.error_level", E_WARNING); var_dump(count(transliterator_list_ids()) > 100); var_dump(count(Transliterator::listIDs()) > 100); echo "Done.\n";
<?php ini_set("intl.error_level", E_WARNING); var_dump(transliterator_list_ids(array())); echo "Done.\n";
function safeTransliterate($text) { /* if available, this function uses PHP5.4's transliterater, which is capable of converting Arabic, Hebrew, Greek, Chinese, Japanese and more into ASCII! however, we use our manual (and crude) fallback *first* instead because we will take the liberty of transliterating some things into more readable ASCII-friendly forms, e.g. "100℃" > "100degc" instead of "100oc" */ /* manual transliteration list: -------------------------------------------------------------------------------------------------------------- */ /* this list is supposed to be practical, not comprehensive, representing: 1. the most common accents and special letters that get typed, and 2. the most practical transliterations for readability; given that I know nothing of other languages, I will need your assistance to improve this list, mail <*****@*****.**> with help and suggestions. this data was produced with the help of: http://www.unicode.org/charts/normalization/ http://www.yuiblog.com/sandbox/yui/3.3.0pr3/api/text-data-accentfold.js.html http://www.utf8-chartable.de/ */ static $translit = array('a' => '/[ÀÁÂẦẤẪẨÃĀĂẰẮẴȦẲǠẢÅÅǺǍȀȂẠẬẶḀĄẚàáâầấẫẩãāăằắẵẳȧǡảåǻǎȁȃạậặḁą]/u', 'b' => '/[ḂḄḆḃḅḇ]/u', 'c' => '/[ÇĆĈĊČḈçćĉċčḉ]/u', 'd' => '/[ÐĎḊḌḎḐḒďḋḍḏḑḓð]/u', 'e' => '/[ÈËĒĔĖĘĚȄȆȨḔḖḘḚḜẸẺẼẾỀỂỄỆèëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệ]/u', 'f' => '/[Ḟḟ]/u', 'g' => '/[ĜĞĠĢǦǴḠĝğġģǧǵḡ]/u', 'h' => '/[ĤȞḢḤḦḨḪĥȟḣḥḧḩḫẖ]/u', 'i' => '/[ÌÏĨĪĬĮİǏȈȊḬḮỈỊiìïĩīĭįǐȉȋḭḯỉị]/u', 'j' => '/[Ĵĵǰ]/u', 'k' => '/[ĶǨḰḲḴKķǩḱḳḵ]/u', 'l' => '/[ĹĻĽĿḶḸḺḼĺļľŀḷḹḻḽ]/u', 'm' => '/[ḾṀṂḿṁṃ]/u', 'n' => '/[ÑŃŅŇǸṄṆṈṊñńņňǹṅṇṉṋ]/u', 'o' => '/[ÒŌŎŐƠǑǪǬȌȎȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢØǾòōŏőơǒǫǭȍȏȭȯȱṍṏṑṓọỏốồổỗộớờởỡợøǿ]/u', 'p' => '/[ṔṖṕṗ]/u', 'r' => '/[ŔŖŘȐȒṘṚṜṞŕŗřȑȓṙṛṝṟ]/u', 's' => '/[ŚŜŞŠȘṠṢṤṦṨſśŝşšșṡṣṥṧṩ]/u', 'ss' => '/[ß]/u', 't' => '/[ŢŤȚṪṬṮṰţťțṫṭṯṱẗ]/u', 'th' => '/[Þþ]/u', 'u' => '/[ÙŨŪŬŮŰŲƯǓȔȖṲṴṶṸṺỤỦỨỪỬỮỰùũūŭůűųưǔȕȗṳṵṷṹṻụủứừửữựµ]/u', 'v' => '/[ṼṾṽṿ]/u', 'w' => '/[ŴẀẂẄẆẈŵẁẃẅẇẉẘ]/u', 'x' => '/[ẊẌẋẍ×]/u', 'y' => '/[ÝŶŸȲẎỲỴỶỸýÿŷȳẏẙỳỵỷỹ]/u', 'z' => '/[ŹŻŽẐẒẔźżžẑẓẕ]/u', 'ae' => '/[ÄǞÆǼǢäǟæǽǣ]/u', 'oe' => '/[ÖȪŒöȫœ]/u', 'dz' => '/[DŽDžDZDzdždz]/u', 'ff' => '/[ff]/u', 'fi' => '/[ffifi]/u', 'ffl' => '/[fflfl]/u', 'ij' => '/[IJij]/u', 'lj' => '/[LJLjlj]/u', 'nj' => '/[NJNjnj]/u', 'st' => '/[ſtst]/u', 'ue' => '/[ÜǕǗǙǛüǖǘǚǜ]/u', 'eur' => '/[€]/u', 'cents' => '/[¢]/u', 'lira' => '/[₤]/u', 'dollars' => '/[$]/u', 'won' => '/[₩]/u', 'rs' => '/[₨]/u', 'yen' => '/[¥]/u', 'pounds' => '/[£]/u', 'pts' => '/[₧]/u', 'degc' => '/[℃]/u', 'degf' => '/[℉]/u', 'no' => '/[№]/u', '-tm' => '/[™]/u'); //do the manual transliteration first $text = preg_replace(array_values($translit), array_keys($translit), $text); //flatten the text down to just a-z0-9 underscore and dash for spaces //(<www.mattcutts.com/blog/dashes-vs-underscores/>) $text = preg_replace(array('/[^_a-z0-9-]/i', '/-{2,}/', '/^-|-$/'), array('-', '-', ''), function_exists('transliterator_transliterate') && count(array_intersect(array('Any-NFKD', 'Any-Latin', 'Latin-ASCII', 'Any-Remove', 'Any-Lower'), transliterator_list_ids())) === 5 ? transliterator_transliterate('NFKD; ' . 'Latin; ' . 'Latin/US-ASCII; ' . '[:Nonspacing Mark:] Remove; ' . 'Lower', $text) : strtolower(function_exists('iconv') ? str_replace(array("'", '"', '`', '^', '~'), '', strtolower(iconv('UTF-8', 'US-ASCII//IGNORE//TRANSLIT', $text))) : $text)); //old iconv versions and certain inputs may cause a null-string. don't allow a blank response return !$text ? '_' : $text; }