Ejemplo n.º 1
0
#!/usr/bin/env php
<?php 
set_include_path(__DIR__ . '/../../src/' . PATH_SEPARATOR . get_include_path());
require 'phpMorphy.php';
if ($argc < 3) {
    echo "Usage " . $argv[0] . " XML_FILE OUT_DIR" . PHP_EOL;
    exit;
}
$xml_file = $argv[1];
$out_dir = $argv[2];
@mkdir($out_dir, 0744, true);
try {
    $source = new phpMorphy_Dict_Source_Xml($xml_file);
    $out = $out_dir . '/' . $source->getLanguage() . ".xml";
    $writer = new phpMorphy_Dict_Writer_Csv(get_abs_filename('part_of_speech.csv'), get_abs_filename('grammems.csv'), get_abs_filename('ancodes.csv'), get_abs_filename('flexia_models.csv'), get_abs_filename('prefixes.csv'), get_abs_filename('lemmas.csv'));
    $writer->setObserver(new phpMorphy_Dict_Writer_Observer_Standart('log_msg'));
    $writer->write($source);
} catch (Exception $e) {
    die((string) $e);
}
function get_abs_filename($name)
{
    return $GLOBALS['out_dir'] . DIRECTORY_SEPARATOR . $name;
}
function log_msg($msg)
{
    echo $msg, PHP_EOL;
}
Ejemplo n.º 2
0
#!/usr/bin/env php
<?php 
set_include_path(__DIR__ . '/../../src/' . PATH_SEPARATOR . get_include_path());
require 'phpMorphy.php';
if ($argc < 3) {
    echo "Usage {$argv['0']} IN_XML OUT_XML";
    exit(1);
}
try {
    $source = new phpMorphy_Dict_Source_Xml($argv[1]);
    $mapping = array();
    $total_models = 0;
    foreach ($source->getFlexias() as $flexia_model) {
        $hash = '';
        foreach ($flexia_model->getFlexias() as $flexia) {
            $prefix = $flexia->getPrefix();
            $suffix = $flexia->getSuffix();
            $hash .= "<{$prefix}>{$suffix}|";
        }
        $mapping[$hash] = 1;
        ++$total_models;
        echo "{$total_models} done\n";
    }
    echo "orig = {$total_models}, new = " . count($mapping) . PHP_EOL;
} catch (Exception $e) {
    echo $e;
    exit(1);
}
#!/usr/bin/env php
<?php 
require_once __DIR__ . '/../init.php';
if ($argc < 2) {
    die("Usage {$argv[0]} DICT.xml\n");
}
$xml = $argv[1];
$formatter = new phpMorphy_Paradigm_Formatter();
$source = new phpMorphy_Dict_Source_Xml($xml);
$flexias = remap_to_ids($source->getFlexias());
$ancodes = remap_to_ids($source->getAncodes());
$prefixes = remap_to_ids($source->getPrefixes());
echo "{$xml}: {", PHP_EOL;
$para_no = 1;
/** @var phpMorphy_Dict_Lemma $lemma */
foreach ($source->getLemmas() as $lemma) {
    $common_grammems = array();
    if ($lemma->hasAncodeId()) {
        /** @var phpMorphy_Dict_Ancode $common_ancode */
        $common_ancode = $ancodes[$lemma->getAncodeId()];
        $common_grammems = $common_ancode->getGrammems();
    }
    $flexia_model = $flexias[$lemma->getFlexiaId()];
    $paradigm = new phpMorphy_Paradigm_ArrayBased(false);
    /** @var phpMorphy_Dict_Flexia $flexia */
    foreach ($flexia_model as $flexia) {
        /** @var phpMorphy_Dict_Ancode $ancode */
        $ancode = $ancodes[$flexia->getAncodeId()];
        $wf = new phpMorphy_WordForm_WordForm();
        $wf->setBase($lemma->getBase());
        $wf->setFormPrefix($flexia->getPrefix());