#!/usr/bin/env php <?php set_include_path(__DIR__ . '/../../src/' . PATH_SEPARATOR . get_include_path()); require 'phpMorphy.php'; if ($argc < 3) { echo "Usage " . $argv[0] . " XML_FILE OUT_DIR" . PHP_EOL; exit; } $xml_file = $argv[1]; $out_dir = $argv[2]; @mkdir($out_dir, 0744, true); try { $source = new phpMorphy_Dict_Source_Xml($xml_file); $out = $out_dir . '/' . $source->getLanguage() . ".xml"; $writer = new phpMorphy_Dict_Writer_Csv(get_abs_filename('part_of_speech.csv'), get_abs_filename('grammems.csv'), get_abs_filename('ancodes.csv'), get_abs_filename('flexia_models.csv'), get_abs_filename('prefixes.csv'), get_abs_filename('lemmas.csv')); $writer->setObserver(new phpMorphy_Dict_Writer_Observer_Standart('log_msg')); $writer->write($source); } catch (Exception $e) { die((string) $e); } function get_abs_filename($name) { return $GLOBALS['out_dir'] . DIRECTORY_SEPARATOR . $name; } function log_msg($msg) { echo $msg, PHP_EOL; }
#!/usr/bin/env php <?php set_include_path(__DIR__ . '/../../src/' . PATH_SEPARATOR . get_include_path()); require 'phpMorphy.php'; if ($argc < 3) { echo "Usage {$argv['0']} IN_XML OUT_XML"; exit(1); } try { $source = new phpMorphy_Dict_Source_Xml($argv[1]); $mapping = array(); $total_models = 0; foreach ($source->getFlexias() as $flexia_model) { $hash = ''; foreach ($flexia_model->getFlexias() as $flexia) { $prefix = $flexia->getPrefix(); $suffix = $flexia->getSuffix(); $hash .= "<{$prefix}>{$suffix}|"; } $mapping[$hash] = 1; ++$total_models; echo "{$total_models} done\n"; } echo "orig = {$total_models}, new = " . count($mapping) . PHP_EOL; } catch (Exception $e) { echo $e; exit(1); }
#!/usr/bin/env php <?php require_once __DIR__ . '/../init.php'; if ($argc < 2) { die("Usage {$argv[0]} DICT.xml\n"); } $xml = $argv[1]; $formatter = new phpMorphy_Paradigm_Formatter(); $source = new phpMorphy_Dict_Source_Xml($xml); $flexias = remap_to_ids($source->getFlexias()); $ancodes = remap_to_ids($source->getAncodes()); $prefixes = remap_to_ids($source->getPrefixes()); echo "{$xml}: {", PHP_EOL; $para_no = 1; /** @var phpMorphy_Dict_Lemma $lemma */ foreach ($source->getLemmas() as $lemma) { $common_grammems = array(); if ($lemma->hasAncodeId()) { /** @var phpMorphy_Dict_Ancode $common_ancode */ $common_ancode = $ancodes[$lemma->getAncodeId()]; $common_grammems = $common_ancode->getGrammems(); } $flexia_model = $flexias[$lemma->getFlexiaId()]; $paradigm = new phpMorphy_Paradigm_ArrayBased(false); /** @var phpMorphy_Dict_Flexia $flexia */ foreach ($flexia_model as $flexia) { /** @var phpMorphy_Dict_Ancode $ancode */ $ancode = $ancodes[$flexia->getAncodeId()]; $wf = new phpMorphy_WordForm_WordForm(); $wf->setBase($lemma->getBase()); $wf->setFormPrefix($flexia->getPrefix());