if (!preg_match('/^e-\\w{3}$/', $item->getAttribute('id'))) { continue; } if (!(7 === count(get_elts_by_tag($item, 'td')))) { continue; } $trs[] = $item; } fprintf(STDERR, "trs count:" . count($trs) . "\n"); # # iterate over the <tr>'s, extracting the data we need # $items = array(); foreach ($trs as $tr) { $tds = get_elts_by_tag($tr, 'td'); $item = array('mapid' => parse_mapid($tds[0]), 'unicode' => parse_unicode($tds[1]), 'char_name' => parse_char_name($tds[2]), 'docomo' => parse_mobile($tds[3]), 'au' => parse_mobile($tds[4]), 'softbank' => parse_mobile($tds[5]), 'google' => parse_google($tds[6])); $items[] = $item; } fprintf(STDERR, "codepoint count:" . count($items) . "\n"); # # filter invalid codepoints # fprintf(STDERR, "filter only_kaomoji ; like e-554 -> [A] -> [A] -> [A] -> [A]\n"); $items = filter_only_kaomoji($items); fprintf(STDERR, "codepoint count:" . count($items) . "\n"); # # export the catalog # echo "<" . "?php \$catalog = "; var_export($items); echo "; ?" . ">";
$doc = new DOMDocument(); @$doc->loadHTML(str_ireplace(array('<br>', '<br/>', '<br />'), "\n", file_get_contents($argv[1]))); $trs = get_elts_by_tag($doc, 'tr'); $trs = filter_els($trs, array(function ($item) { return cond_attr_not($item, 'class', 'not_in_proposal'); }, function ($item) { return cond_attr_match($item, 'id', '/^e-\\w{3}$/'); }, function ($item) { return 7 === count(get_elts_by_tag($item, 'td')); })); fprintf(STDERR, "trs count:" . count($trs) . "\n"); $mapping = array(); foreach ($trs as $tr) { $map = array(); $tds = get_elts_by_tag($tr, 'td'); $map['mapid'] = parse_mapid($tds[0]); $map['unicode'] = parse_unicode($tds[1]); $map['char_name'] = parse_char_name($tds[2]); $map['docomo'] = parse_mobile($tds[3]); $map['au'] = parse_mobile($tds[4]); $map['softbank'] = parse_mobile($tds[5]); $map['google'] = parse_google($tds[6]); $mapping[] = $map; } fprintf(STDERR, "mapping count:" . count($mapping) . "\n"); //filter invalid mapping fprintf(STDERR, "filter only_kaomoji ; like e-554 -> [A] -> [A] -> [A] -> [A]\n"); $mapping = filter_only_kaomoji($mapping); fprintf(STDERR, "mapping count:" . count($mapping) . "\n"); fprintf(STDERR, "filter chars-group ; like #44+#139\n"); $mapping = filter_chars_group($mapping);