function extract_gramtab($graminfoFile, $outDir, $asText) { $factory = new phpMorphy_Storage_Factory(); $graminfo = phpMorphy_GramInfo::create($factory->open(PHPMORPHY_STORAGE_FILE, $graminfoFile, false), false); $poses = $graminfo->readAllPartOfSpeech(); $grammems = $graminfo->readAllGrammems(); $ancodes = $graminfo->readAllAncodes(); if ($asText) { foreach ($ancodes as &$ancode) { $pos_id = $ancode['pos_id']; if (!isset($poses[$pos_id])) { throw new Exception("Unknown pos_id '{$pos_id}' found"); } $ancode['pos_id'] = $poses[$pos_id]['name']; foreach ($ancode['grammem_ids'] as &$grammem_id) { if (!isset($grammems[$grammem_id])) { throw new Exception("Unknown grammem_id '{$grammem_id}' found"); } $grammem_id = $grammems[$grammem_id]['name']; } } //$poses = replace_keys_with_name($poses); //$grammems = replace_keys_with_name($grammems); } $result = array('poses' => $poses, 'grammems' => $grammems, 'ancodes' => $ancodes); $type = $asText ? '_txt' : ''; $out_file = 'gramtab' . $type . '.' . strtolower($graminfo->getLocale()) . '.bin'; $out_file = $outDir . '/' . $out_file; if (false === file_put_contents($out_file, serialize($result))) { throw new Exception("Can`t write '{$out_file}'"); } }
#!/usr/bin/php <?php if (2 == (ini_get('mbstring.func_overload') & 2)) { die("don`t overload string functions in mbstring extension, see mbstring.func_overload option"); } if ($argc < 3) { echo "Usage " . $argv[0] . " MORPH_DATA_FILE OUT_DIR"; exit; } require_once dirname(__FILE__) . '/../src/common.php'; $file = $argv[1]; $out_dir = $argv[2]; $use_references = true; try { $factory = new phpMorphy_Storage_Factory(); $graminfo = phpMorphy_GramInfo::create($factory->open(PHPMORPHY_STORAGE_FILE, $file, false), false); $ancodes_map = new Map('ancodes'); $flexias_map = new Map('affixes'); $i = 0; foreach ($graminfo->readAllFlexia() as $id => $flexia) { $offset = $flexia['header']['offset']; // + $graminfo->getGramInfoHeaderSize(); $ancodes_map->update($flexia, $offset); //$flexias_map->update($flexia, $offset); $i++; } echo "Total flexias = {$i}, unique ancodes = " . count($ancodes_map->getMap()) . ', unique flexias = ' . count($flexias_map->getMap()) . PHP_EOL; $out_file_format = $out_dir . '/%s.' . strtolower($graminfo->getLocale()) . '.bin'; file_put_contents(sprintf($out_file_format, 'morph_data_ancodes_cache'), serialize($ancodes_map->compose($use_references))); //file_put_contents(sprintf($out_file_format, 'morph_data_flexias_cache'), serialize($flexias_map->compose($use_references))); } catch (Exception $e) {
function __get($propName) { if ($propName == 'info') { $this->info = phpMorphy_GramInfo::create($this->storage, false); unset($this->storage); return $this->info; } throw new phpMorphy_Exception("Unknown prop name '{$propName}'"); }