Beispiel #1
0
 public static function transform($string)
 {
     if (!function_exists('transliterator_transliterate') || !function_exists('transliterator_list_ids')) {
         return self::simpleTransform($string);
     }
     $transliteratorIds = transliterator_list_ids();
     if (!in_array('Any-Latin', $transliteratorIds) || !in_array('Latin-ASCII', $transliteratorIds)) {
         return self::simpleTransform($string);
     }
     return transliterator_transliterate('Any-Latin; Latin-ASCII; [\\u0100-\\u7fff] remove', $string);
 }
 public static function urlify($string, $spacer = '-')
 {
     $string = trim($string);
     $string = htmlspecialchars_decode($string, ENT_QUOTES);
     $string = strip_tags($string);
     $string = str_replace(array('$', '£', '€', '™', '®', '|'), array('', 'GBP ', 'EUR ', 'tm', 'r', ''), $string);
     $string = preg_replace('#(\\d)\\.(\\d)#', '$1 $2', $string);
     // make sure numbers with decimals don't mislead, e.g. 2.5 -> 25
     $tranliterator_rule = 'Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();';
     if (function_exists('transliterator_list_ids')) {
         if (in_array('Latin-ASCII', transliterator_list_ids())) {
             $tranliterator_rule = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();';
         }
     }
     if (function_exists('transliterator_transliterate')) {
         $string = str_replace('-', ' ', $string);
         $s = transliterator_transliterate($tranliterator_rule, $string);
     } else {
         if (class_exists('Transliterator')) {
             $string = str_replace('-', ' ', $string);
             $T = Transliterator::create($tranliterator_rule);
             $s = $T->transliterate($string);
         } else {
             $s = iconv('UTF-8', 'ASCII//TRANSLIT', $string);
             $s = strtolower($s);
             $s = preg_replace('/[^a-z0-9\\-\\s]/', '', $s);
         }
     }
     $s = preg_replace('/[\\s\\-]+/', $spacer, $s);
     if (strlen($s) > 0) {
         return $s;
     } else {
         return PerchUtil::urlify_non_translit($string);
     }
 }
<?php

ini_set("intl.error_level", E_WARNING);
var_dump(count(transliterator_list_ids()) > 100);
var_dump(count(Transliterator::listIDs()) > 100);
echo "Done.\n";
<?php

ini_set("intl.error_level", E_WARNING);
var_dump(transliterator_list_ids(array()));
echo "Done.\n";
Beispiel #5
0
function safeTransliterate($text)
{
    /* if available, this function uses PHP5.4's transliterater, which is capable of converting Arabic, Hebrew, Greek,
       Chinese, Japanese and more into ASCII! however, we use our manual (and crude) fallback *first* instead because
       we will take the liberty of transliterating some things into more readable ASCII-friendly forms,
       e.g. "100℃" > "100degc" instead of "100oc" */
    /* manual transliteration list:
       -------------------------------------------------------------------------------------------------------------- */
    /* this list is supposed to be practical, not comprehensive, representing:
          1. the most common accents and special letters that get typed, and
          2. the most practical transliterations for readability;
          
          given that I know nothing of other languages, I will need your assistance to improve this list,
          mail <*****@*****.**> with help and suggestions.
          
          this data was produced with the help of:
          http://www.unicode.org/charts/normalization/
          http://www.yuiblog.com/sandbox/yui/3.3.0pr3/api/text-data-accentfold.js.html
          http://www.utf8-chartable.de/
       */
    static $translit = array('a' => '/[ÀÁÂẦẤẪẨÃĀĂẰẮẴȦẲǠẢÅÅǺǍȀȂẠẬẶḀĄẚàáâầấẫẩãāăằắẵẳȧǡảåǻǎȁȃạậặḁą]/u', 'b' => '/[ḂḄḆḃḅḇ]/u', 'c' => '/[ÇĆĈĊČḈçćĉċčḉ]/u', 'd' => '/[ÐĎḊḌḎḐḒďḋḍḏḑḓð]/u', 'e' => '/[ÈËĒĔĖĘĚȄȆȨḔḖḘḚḜẸẺẼẾỀỂỄỆèëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệ]/u', 'f' => '/[Ḟḟ]/u', 'g' => '/[ĜĞĠĢǦǴḠĝğġģǧǵḡ]/u', 'h' => '/[ĤȞḢḤḦḨḪĥȟḣḥḧḩḫẖ]/u', 'i' => '/[ÌÏĨĪĬĮİǏȈȊḬḮỈỊiìïĩīĭįǐȉȋḭḯỉị]/u', 'j' => '/[Ĵĵǰ]/u', 'k' => '/[ĶǨḰḲḴKķǩḱḳḵ]/u', 'l' => '/[ĹĻĽĿḶḸḺḼĺļľŀḷḹḻḽ]/u', 'm' => '/[ḾṀṂḿṁṃ]/u', 'n' => '/[ÑŃŅŇǸṄṆṈṊñńņňǹṅṇṉṋ]/u', 'o' => '/[ÒŌŎŐƠǑǪǬȌȎȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢØǾòōŏőơǒǫǭȍȏȭȯȱṍṏṑṓọỏốồổỗộớờởỡợøǿ]/u', 'p' => '/[ṔṖṕṗ]/u', 'r' => '/[ŔŖŘȐȒṘṚṜṞŕŗřȑȓṙṛṝṟ]/u', 's' => '/[ŚŜŞŠȘṠṢṤṦṨſśŝşšșṡṣṥṧṩ]/u', 'ss' => '/[ß]/u', 't' => '/[ŢŤȚṪṬṮṰţťțṫṭṯṱẗ]/u', 'th' => '/[Þþ]/u', 'u' => '/[ÙŨŪŬŮŰŲƯǓȔȖṲṴṶṸṺỤỦỨỪỬỮỰùũūŭůűųưǔȕȗṳṵṷṹṻụủứừửữựµ]/u', 'v' => '/[ṼṾṽṿ]/u', 'w' => '/[ŴẀẂẄẆẈŵẁẃẅẇẉẘ]/u', 'x' => '/[ẊẌẋẍ×]/u', 'y' => '/[ÝŶŸȲẎỲỴỶỸýÿŷȳẏẙỳỵỷỹ]/u', 'z' => '/[ŹŻŽẐẒẔźżžẑẓẕ]/u', 'ae' => '/[ÄǞÆǼǢäǟæǽǣ]/u', 'oe' => '/[ÖȪŒöȫœ]/u', 'dz' => '/[DŽDžDZDzdždz]/u', 'ff' => '/[ff]/u', 'fi' => '/[ffifi]/u', 'ffl' => '/[fflfl]/u', 'ij' => '/[IJij]/u', 'lj' => '/[LJLjlj]/u', 'nj' => '/[NJNjnj]/u', 'st' => '/[ſtst]/u', 'ue' => '/[ÜǕǗǙǛüǖǘǚǜ]/u', 'eur' => '/[€]/u', 'cents' => '/[¢]/u', 'lira' => '/[₤]/u', 'dollars' => '/[$]/u', 'won' => '/[₩]/u', 'rs' => '/[₨]/u', 'yen' => '/[¥]/u', 'pounds' => '/[£]/u', 'pts' => '/[₧]/u', 'degc' => '/[℃]/u', 'degf' => '/[℉]/u', 'no' => '/[№]/u', '-tm' => '/[™]/u');
    //do the manual transliteration first
    $text = preg_replace(array_values($translit), array_keys($translit), $text);
    //flatten the text down to just a-z0-9 underscore and dash for spaces
    //(<www.mattcutts.com/blog/dashes-vs-underscores/>)
    $text = preg_replace(array('/[^_a-z0-9-]/i', '/-{2,}/', '/^-|-$/'), array('-', '-', ''), function_exists('transliterator_transliterate') && count(array_intersect(array('Any-NFKD', 'Any-Latin', 'Latin-ASCII', 'Any-Remove', 'Any-Lower'), transliterator_list_ids())) === 5 ? transliterator_transliterate('NFKD; ' . 'Latin; ' . 'Latin/US-ASCII; ' . '[:Nonspacing Mark:] Remove; ' . 'Lower', $text) : strtolower(function_exists('iconv') ? str_replace(array("'", '"', '`', '^', '~'), '', strtolower(iconv('UTF-8', 'US-ASCII//IGNORE//TRANSLIT', $text))) : $text));
    //old iconv versions and certain inputs may cause a null-string. don't allow a blank response
    return !$text ? '_' : $text;
}