function charCallback($data) { // Skip non-printable characters, // but do not skip a normal space (U+0020) since // people like to use that as a fake no header symbol. $category = substr($data['gc'], 0, 1); if (strpos('LNPS', $category) === false && $data['cp'] !== '0020') { return; } $cp = hexdec($data['cp']); // Skip the CJK ideograph blocks, as an optimisation measure. // UCA doesn't sort them properly anyway, without tailoring. if (IcuCollation::isCjk($cp)) { return; } // Skip the composed Hangul syllables, we will use the bare Jamo // as first letters if ($data['block'] == 'Hangul Syllables') { return; } // Calculate implicit weight per UTS #10 v6.0.0, sec 7.1.3 if ($data['UIdeo'] === 'Y') { if ($data['block'] == 'CJK Unified Ideographs' || $data['block'] == 'CJK Compatibility Ideographs') { $base = 0xfb40; } else { $base = 0xfb80; } } else { $base = 0xfbc0; } $a = $base + ($cp >> 15); $b = $cp & 0x7fff | 0x8000; $this->weights[$cp] = sprintf(".%04X.%04X", $a, $b); if ($data['dm'] !== '#') { $this->mappedChars[$cp] = true; } if ($cp % 4096 == 0) { print "{$data['cp']}\n"; } }