PHP UtfNormal::toNFKD Examples

Programming Language: PHP

Class/Type: UtfNormal

Method/Function: toNFKD

Examples at hotexamples.com: 3

PHP UtfNormal::toNFKD - 3 examples found. These are the top rated real world PHP examples of UtfNormal::toNFKD extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

cleanUp(30)

toNFC(7)

quickIsNFCVerify(6)

toNFD(5)

fastCombiningSort(3)

fastDecompose(3)

toNFKD(3)

NFC(2)

NFKC(2)

loadData(2)

quickIsNFC(2)

NFD(1)

NFKD(1)

fastCompose(1)

quickisNFCVerify(1)

toNFKC(1)

Example #1

Show file

File: AntiSpoof_body.php Project: jasonthebomb/mediawiki-extensions-AntiSpoof

 /**
  * TODO: does too much in one routine, refactor...
  * @param $testName
  * @return array
  */
 public static function checkUnicodeString($testName)
 {
     global $wgAntiSpoofBlacklist;
     # Start with some sanity checking
     if (!is_array($wgAntiSpoofBlacklist)) {
         throw new MWException('$wgAntiSpoofBlacklist should be an array!');
     }
     if (!is_string($testName)) {
         return array("ERROR", wfMessage('antispoof-badtype')->text());
     }
     if (strlen($testName) == 0) {
         return array("ERROR", wfMessage('antispoof-empty')->text());
     }
     foreach (self::stringToList($testName) as $char) {
         if (in_array($char, $wgAntiSpoofBlacklist)) {
             return self::badCharErr('antispoof-blacklisted', $char);
         }
     }
     # Perform Unicode _compatibility_ decomposition
     $testName = UtfNormal::toNFKD($testName);
     $testChars = self::stringToList($testName);
     # Be paranoid: check again, just in case Unicode normalization code changes...
     foreach ($testChars as $char) {
         if (in_array($char, $wgAntiSpoofBlacklist)) {
             return self::badCharErr('antispoof-blacklisted', $char);
         }
     }
     # Check for this: should not happen in any valid Unicode string
     if (self::getScriptCode($testChars[0]) == "SCRIPT_COMBINING_MARKS") {
         return self::badCharErr('antispoof-combining', $testChars[0]);
     }
     # Strip all combining characters in order to crudely strip accents
     # Note: NFKD normalization should have decomposed all accented chars earlier
     $testChars = self::stripScript($testChars, "SCRIPT_COMBINING_MARKS");
     $testScripts = array_map(array('AntiSpoof', 'getScriptCode'), $testChars);
     $unassigned = array_search("SCRIPT_UNASSIGNED", $testScripts);
     if ($unassigned !== False) {
         return self::badCharErr('antispoof-unassigned', $testChars[$unassigned]);
     }
     $deprecated = array_search("SCRIPT_DEPRECATED", $testScripts);
     if ($deprecated !== False) {
         return self::badCharErr('antispoof-deprecated', $testChars[$deprecated]);
     }
     $testScripts = array_unique($testScripts);
     # We don't mind ASCII punctuation or digits
     $testScripts = array_diff($testScripts, array("SCRIPT_ASCII_PUNCTUATION", "SCRIPT_ASCII_DIGITS"));
     if (!$testScripts) {
         return array("ERROR", wfMessage('antispoof-noletters')->text());
     }
     if (count($testScripts) > 1 && !self::isAllowedScriptCombination($testScripts)) {
         return array("ERROR", wfMessage('antispoof-mixedscripts')->text());
     }
     # At this point, we should probably check for BiDi violations if they aren't
     # caught above...
     # Replace characters in confusables set with equivalence chars
     $testChars = self::equivString($testChars);
     # Do very simple sequence processing: "vv" -> "w", "rn" -> "m"...
     # Not exhaustive, but ups the ante...
     # Do this _after_ canonicalization: looks weird, but needed for consistency
     $testChars = self::mergePairs($testChars, self::equivString(self::stringToList("VV")), self::equivString(self::stringToList("W")));
     $testChars = self::mergePairs($testChars, self::equivString(self::stringToList("RN")), self::equivString(self::stringToList("M")));
     # Squeeze out all punctuation chars
     # TODO: almost the same code occurs twice, refactor into own routine
     $testChars = self::stripScript($testChars, "SCRIPT_ASCII_PUNCTUATION");
     $testName = self::listToString($testChars);
     # Remove all remaining spaces, just in case any have snuck through...
     $testName = self::hardjoin(explode(" ", $testName));
     # Reduce repeated char sequences to single character
     # BUG: TODO: implement this
     if (strlen($testName) < 1) {
         return array("ERROR", wfMessage('antispoof-tooshort')->text());
     }
     # Don't ASCIIfy: we assume we are UTF-8 capable on output
     # Prepend version string, for futureproofing if this algorithm changes
     $testName = "v2:" . $testName;
     # And return the canonical version of the name
     return array("OK", $testName);
 }

Example #2

Show file

File: UtfNormalTest2.php Project: seedbank/old-repo

function normalize_form_kd_php($c)
{
    return UtfNormal::toNFKD($c, "php");
}

Example #3

Show file

File: class.ilShibboleth.php Project: Walid-Synakene/ilias

 /**
  * Replaces any non-ASCII character by its linguistically most logical substitution
  *
  * @access private
  * @param string A Shibboleth attribute or other string
  * @return string ascii-version of attribute
  */
 function toAscii($string)
 {
     require_once 'include/Unicode/UtfNormal.php';
     // Normalize to NFKD.
     // This separates letters from combining marks.
     // See http://unicode.org/reports/tr15
     $string = UtfNormal::toNFKD($string);
     // Replace german usages of diaeresis by appending an e
     $string = preg_replace('/([aouAOU])\\xcc\\x88/', '\\1e', $string);
     // Replace the combined ae character by separated a and e
     $string = preg_replace('/\\xc3\\x86/', 'AE', $string);
     $string = preg_replace('/\\xc3\\xa6/', 'ae', $string);
     // Replace the combined thorn character by th
     $string = preg_replace('/\\xc3\\x9e/', 'TH', $string);
     $string = preg_replace('/\\xc3\\xbe/', 'th', $string);
     // Replace the letter eth by d
     $string = preg_replace('/\\xc3\\x90/', 'D', $string);
     $string = preg_replace('/\\xc4\\x91/', 'd', $string);
     $string = preg_replace('/\\xc4\\x90/', 'D', $string);
     // Replace the combined ss character
     $string = preg_replace('/\\xc3\\x9f/', 'ss', $string);
     // Get rid of everything except the characters a to z and the hyphen
     $string = preg_replace('/[^a-zA-Z\\-]/i', '', $string);
     return $string;
 }