示例#1
0
 function charCallback($data)
 {
     // Skip non-printable characters,
     // but do not skip a normal space (U+0020) since
     // people like to use that as a fake no header symbol.
     $category = substr($data['gc'], 0, 1);
     if (strpos('LNPS', $category) === false && $data['cp'] !== '0020') {
         return;
     }
     $cp = hexdec($data['cp']);
     // Skip the CJK ideograph blocks, as an optimisation measure.
     // UCA doesn't sort them properly anyway, without tailoring.
     if (IcuCollation::isCjk($cp)) {
         return;
     }
     // Skip the composed Hangul syllables, we will use the bare Jamo
     // as first letters
     if ($data['block'] == 'Hangul Syllables') {
         return;
     }
     // Calculate implicit weight per UTS #10 v6.0.0, sec 7.1.3
     if ($data['UIdeo'] === 'Y') {
         if ($data['block'] == 'CJK Unified Ideographs' || $data['block'] == 'CJK Compatibility Ideographs') {
             $base = 0xfb40;
         } else {
             $base = 0xfb80;
         }
     } else {
         $base = 0xfbc0;
     }
     $a = $base + ($cp >> 15);
     $b = $cp & 0x7fff | 0x8000;
     $this->weights[$cp] = sprintf(".%04X.%04X", $a, $b);
     if ($data['dm'] !== '#') {
         $this->mappedChars[$cp] = true;
     }
     if ($cp % 4096 == 0) {
         print "{$data['cp']}\n";
     }
 }