/** * 文字列中のひらがな・漢字を、可能な限りカタカナに変換し、文節と思われる部分にハイフンマイナスを挟みます。 * @param string $kanjiAndKanaString * @return string */ protected static function translateUsingKatakana(string $kanjiAndKanaString) : string { if (!static::$igo) { static::$igo = new \Igo\Tagger(['dict_dir' => __DIR__ . '/../../naist-jdic']); } return implode('-', array_map(function ($morpheme) { $feature = explode(',', $morpheme->feature); return isset($feature[8]) && !in_array($feature[1], ['数', 'アルファベット']) ? $feature[8] : $morpheme->surface; }, static::$igo->parse($kanjiAndKanaString))); }
<?php spl_autoload_register(function ($class) { $parts = explode('\\', $class); # Support for non-namespaced classes. $parts[] = str_replace('_', DIRECTORY_SEPARATOR, array_pop($parts)); //$path = implode(DIRECTORY_SEPARATOR, $parts); $path = '../lib/' . implode(DIRECTORY_SEPARATOR, $parts); $file = stream_resolve_include_path($path . '.php'); if ($file !== false) { require $file; } }); use Igo\Tagger; $igo = new Tagger(array('dict_dir' => '../jdic', 'output_encoding' => 'Shift_JIS')); $result = $igo->parse("すもももももももものうち"); print_r($result); echo memory_get_peak_usage(), "\n";
<?php spl_autoload_register(function ($class) { $parts = explode('\\', $class); # Support for non-namespaced classes. $parts[] = str_replace('_', DIRECTORY_SEPARATOR, array_pop($parts)); //$path = implode(DIRECTORY_SEPARATOR, $parts); $path = '../lib/' . implode(DIRECTORY_SEPARATOR, $parts); $file = stream_resolve_include_path($path . '.php'); if ($file !== false) { require $file; } }); use Igo\Tagger; $igo = new Tagger(array('dict_dir' => '../ipadic', 'reduce_mode' => true, 'output_encoding' => 'Shift_JIS')); $result = $igo->wakati("にわにはにわのにわとりがいる"); print_r($result); echo memory_get_peak_usage(), "\n";
$parts = explode('\\', $class); # Support for non-namespaced classes. $parts[] = str_replace('_', DIRECTORY_SEPARATOR, array_pop($parts)); //$path = implode(DIRECTORY_SEPARATOR, $parts); $path = '../lib/' . implode(DIRECTORY_SEPARATOR, $parts); $file = stream_resolve_include_path($path . '.php'); if ($file !== false) { require $file; } }); use Igo\Tagger; $encode = "UTF-8"; ini_set("memory_limit", "1073741824"); //1024^3 $text = file_get_contents("./yoshinoya.txt"); $igo = new Tagger(array('dict_dir' => '../jdic', 'reduce_mode' => false)); $bench = new benchmark(); $bench->start(); $result = $igo->parse($text); $bench->end(); print_r("score: " . $bench->score); print_r("\n"); $fp = fopen("./php-igo.result", "w"); foreach ($result as $res) { $buf = ""; $buf .= $res->surface; $buf .= ","; $buf .= $res->feature; $buf .= ","; $buf .= $res->start; $buf .= "\r\n";