コード例 #1
0
ファイル: extract_gramtab.php プロジェクト: 4otaku/4otaku
function extract_gramtab($graminfoFile, $outDir, $asText)
{
    $factory = new phpMorphy_Storage_Factory();
    $graminfo = phpMorphy_GramInfo::create($factory->open(PHPMORPHY_STORAGE_FILE, $graminfoFile, false), false);
    $poses = $graminfo->readAllPartOfSpeech();
    $grammems = $graminfo->readAllGrammems();
    $ancodes = $graminfo->readAllAncodes();
    if ($asText) {
        foreach ($ancodes as &$ancode) {
            $pos_id = $ancode['pos_id'];
            if (!isset($poses[$pos_id])) {
                throw new Exception("Unknown pos_id '{$pos_id}' found");
            }
            $ancode['pos_id'] = $poses[$pos_id]['name'];
            foreach ($ancode['grammem_ids'] as &$grammem_id) {
                if (!isset($grammems[$grammem_id])) {
                    throw new Exception("Unknown grammem_id '{$grammem_id}' found");
                }
                $grammem_id = $grammems[$grammem_id]['name'];
            }
        }
        //$poses = replace_keys_with_name($poses);
        //$grammems = replace_keys_with_name($grammems);
    }
    $result = array('poses' => $poses, 'grammems' => $grammems, 'ancodes' => $ancodes);
    $type = $asText ? '_txt' : '';
    $out_file = 'gramtab' . $type . '.' . strtolower($graminfo->getLocale()) . '.bin';
    $out_file = $outDir . '/' . $out_file;
    if (false === file_put_contents($out_file, serialize($result))) {
        throw new Exception("Can`t write '{$out_file}'");
    }
}
コード例 #2
0
#!/usr/bin/php
<?php 
if (2 == (ini_get('mbstring.func_overload') & 2)) {
    die("don`t overload string functions in mbstring extension, see mbstring.func_overload option");
}
if ($argc < 3) {
    echo "Usage " . $argv[0] . " MORPH_DATA_FILE OUT_DIR";
    exit;
}
require_once dirname(__FILE__) . '/../src/common.php';
$file = $argv[1];
$out_dir = $argv[2];
$use_references = true;
try {
    $factory = new phpMorphy_Storage_Factory();
    $graminfo = phpMorphy_GramInfo::create($factory->open(PHPMORPHY_STORAGE_FILE, $file, false), false);
    $ancodes_map = new Map('ancodes');
    $flexias_map = new Map('affixes');
    $i = 0;
    foreach ($graminfo->readAllFlexia() as $id => $flexia) {
        $offset = $flexia['header']['offset'];
        // + $graminfo->getGramInfoHeaderSize();
        $ancodes_map->update($flexia, $offset);
        //$flexias_map->update($flexia, $offset);
        $i++;
    }
    echo "Total flexias = {$i}, unique ancodes = " . count($ancodes_map->getMap()) . ', unique flexias = ' . count($flexias_map->getMap()) . PHP_EOL;
    $out_file_format = $out_dir . '/%s.' . strtolower($graminfo->getLocale()) . '.bin';
    file_put_contents(sprintf($out_file_format, 'morph_data_ancodes_cache'), serialize($ancodes_map->compose($use_references)));
    //file_put_contents(sprintf($out_file_format, 'morph_data_flexias_cache'), serialize($flexias_map->compose($use_references)));
} catch (Exception $e) {
コード例 #3
0
ファイル: graminfo.php プロジェクト: antixrist/phpMorphy
 function __get($propName)
 {
     if ($propName == 'info') {
         $this->info = phpMorphy_GramInfo::create($this->storage, false);
         unset($this->storage);
         return $this->info;
     }
     throw new phpMorphy_Exception("Unknown prop name '{$propName}'");
 }