PHP UtfNormal::loadData Examples

Programming Language: PHP

Class/Type: UtfNormal

Method/Function: loadData

Examples at hotexamples.com: 2

PHP UtfNormal::loadData - 2 examples found. These are the top rated real world PHP examples of UtfNormal::loadData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

cleanUp(30)

toNFC(7)

quickIsNFCVerify(6)

toNFD(5)

fastCombiningSort(3)

fastDecompose(3)

toNFKD(3)

NFC(2)

NFKC(2)

loadData(2)

quickIsNFC(2)

NFD(1)

NFKD(1)

fastCompose(1)

quickisNFCVerify(1)

toNFKC(1)

Example #1

Show file

File: UtfNormalMemStress.php Project: Grprashanthkumar/ColfusionWeb

if (PHP_SAPI != 'cli') {
    die("Run me from the command line please.\n");
}
if (isset($_SERVER['argv']) && in_array('--icu', $_SERVER['argv'])) {
    dl('php_utfnormal.so');
}
require_once 'UtfNormalDefines.php';
require_once 'UtfNormalUtil.php';
require_once 'UtfNormal.php';
define('BENCH_CYCLES', 1);
define('BIGSIZE', 1024 * 1024 * 10);
// 10m
ini_set('memory_limit', BIGSIZE + 120 * 1024 * 1024);
$testfiles = array('testdata/washington.txt' => 'English text', 'testdata/berlin.txt' => 'German text', 'testdata/bulgakov.txt' => 'Russian text', 'testdata/tokyo.txt' => 'Japanese text', 'testdata/young.txt' => 'Korean text');
$normalizer = new UtfNormal();
UtfNormal::loadData();
foreach ($testfiles as $file => $desc) {
    benchmarkTest($normalizer, $file, $desc);
}
# -------
function benchmarkTest(&$u, $filename, $desc)
{
    print "Testing {$filename} ({$desc})...\n";
    $data = file_get_contents($filename);
    $all = $data;
    while (strlen($all) < BIGSIZE) {
        $all .= $all;
    }
    $data = $all;
    echo "Data is " . strlen($data) . " bytes.\n";
    $forms = array('quickIsNFCVerify', 'cleanUp');

Example #2

Show file

File: UtfNormal.php Project: GodelDesign/Godel

 /**
  * Produces canonically composed sequences, i.e. normal form C or KC.
  *
  * @private
  * @param $string String: a valid UTF-8 string in sorted normal form D or KD. Input is not validated.
  * @return string a UTF-8 string with canonical precomposed characters used where possible
  */
 static function fastCompose($string)
 {
     UtfNormal::loadData();
     $len = strlen($string);
     $out = '';
     $lastClass = -1;
     $lastHangul = 0;
     $startChar = '';
     $combining = '';
     $x1 = ord(substr(UTF8_HANGUL_VBASE, 0, 1));
     $x2 = ord(substr(UTF8_HANGUL_TEND, 0, 1));
     for ($i = 0; $i < $len; $i++) {
         $c = $string[$i];
         $n = ord($c);
         if ($n < 0x80) {
             # No combining characters here...
             $out .= $startChar;
             $out .= $combining;
             $startChar = $c;
             $combining = '';
             $lastClass = 0;
             continue;
         } elseif ($n >= 0xf0) {
             $c = substr($string, $i, 4);
             $i += 3;
         } elseif ($n >= 0xe0) {
             $c = substr($string, $i, 3);
             $i += 2;
         } elseif ($n >= 0xc0) {
             $c = substr($string, $i, 2);
             $i++;
         }
         $pair = $startChar . $c;
         if ($n > 0x80) {
             if (isset(self::$utfCombiningClass[$c])) {
                 # A combining char; see what we can do with it
                 $class = self::$utfCombiningClass[$c];
                 if (!empty($startChar) && $lastClass < $class && $class > 0 && isset(self::$utfCanonicalComp[$pair])) {
                     $startChar = self::$utfCanonicalComp[$pair];
                     $class = 0;
                 } else {
                     $combining .= $c;
                 }
                 $lastClass = $class;
                 $lastHangul = 0;
                 continue;
             }
         }
         # New start char
         if ($lastClass == 0) {
             if (isset(self::$utfCanonicalComp[$pair])) {
                 $startChar = self::$utfCanonicalComp[$pair];
                 $lastHangul = 0;
                 continue;
             }
             if ($n >= $x1 && $n <= $x2) {
                 # WARNING: Hangul code is painfully slow.
                 # I apologize for this ugly, ugly code; however
                 # performance is even more teh suck if we call
                 # out to nice clean functions. Lookup tables are
                 # marginally faster, but require a lot of space.
                 #
                 if ($c >= UTF8_HANGUL_VBASE && $c <= UTF8_HANGUL_VEND && $startChar >= UTF8_HANGUL_LBASE && $startChar <= UTF8_HANGUL_LEND) {
                     #
                     #$lIndex = utf8ToCodepoint( $startChar ) - UNICODE_HANGUL_LBASE;
                     #$vIndex = utf8ToCodepoint( $c ) - UNICODE_HANGUL_VBASE;
                     $lIndex = ord($startChar[2]) - 0x80;
                     $vIndex = ord($c[2]) - 0xa1;
                     $hangulPoint = UNICODE_HANGUL_FIRST + UNICODE_HANGUL_TCOUNT * (UNICODE_HANGUL_VCOUNT * $lIndex + $vIndex);
                     # Hardcode the limited-range UTF-8 conversion:
                     $startChar = chr($hangulPoint >> 12 & 0xf | 0xe0) . chr($hangulPoint >> 6 & 0x3f | 0x80) . chr($hangulPoint & 0x3f | 0x80);
                     $lastHangul = 0;
                     continue;
                 } elseif ($c >= UTF8_HANGUL_TBASE && $c <= UTF8_HANGUL_TEND && $startChar >= UTF8_HANGUL_FIRST && $startChar <= UTF8_HANGUL_LAST && !$lastHangul) {
                     # $tIndex = utf8ToCodepoint( $c ) - UNICODE_HANGUL_TBASE;
                     $tIndex = ord($c[2]) - 0xa7;
                     if ($tIndex < 0) {
                         $tIndex = ord($c[2]) - 0x80 + (0x11c0 - 0x11a7);
                     }
                     # Increment the code point by $tIndex, without
                     # the function overhead of decoding and recoding UTF-8
                     #
                     $tail = ord($startChar[2]) + $tIndex;
                     if ($tail > 0xbf) {
                         $tail -= 0x40;
                         $mid = ord($startChar[1]) + 1;
                         if ($mid > 0xbf) {
                             $startChar[0] = chr(ord($startChar[0]) + 1);
                             $mid -= 0x40;
                         }
                         $startChar[1] = chr($mid);
                     }
                     $startChar[2] = chr($tail);
                     # If there's another jamo char after this, *don't* try to merge it.
                     $lastHangul = 1;
                     continue;
                 }
             }
         }
         $out .= $startChar;
         $out .= $combining;
         $startChar = $c;
         $combining = '';
         $lastClass = 0;
         $lastHangul = 0;
     }
     $out .= $startChar . $combining;
     return $out;
 }