Пример #1
0
function createLexemDefinitionMap()
{
    LexemDefinitionMap::deleteAll();
    $dbResult = db_selectAllConcepts();
    print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n";
    $seen = 0;
    while ($dbRow = mysql_fetch_assoc($dbResult)) {
        $concept = new Concept();
        $concept->populateFromDbRow($dbRow);
        $words = Word::loadByConceptId($concept->id);
        $definitions = Definition::loadByConceptId($concept->id);
        if ($definitions) {
            // Select distinct words
            $distinctWords = array();
            foreach ($words as $word) {
                $distinctWords[$word->name] = 1;
            }
            // For every word, look up all the lexems. Then map each of those lexems
            // to every definition.
            foreach ($distinctWords as $word => $ignored) {
                $lexems = Lexem::loadByUnaccented($word);
                // Create lexem if necessary so that we don't lose any words during the
                // migration
                if (count($lexems) == 0) {
                    $lexem = Lexem::create($word, 'T', 1, '');
                    $lexem->save();
                    $lexem->id = db_getLastInsertedId();
                    $lexems[] = $lexem;
                    $lexem->regenerateParadigm();
                }
                foreach ($lexems as $lexem) {
                    foreach ($definitions as $definition) {
                        $ldm = LexemDefinitionMap::load($lexem->id, $definition->id);
                        if (!$ldm) {
                            $ldm = LexemDefinitionMap::create($lexem->id, $definition->id);
                            $ldm->save();
                        }
                    }
                }
            }
        }
        $seen++;
        if ($seen % 1000 == 0) {
            print "Seen: {$seen};\n";
        }
    }
    print "Seen: {$seen};\n";
}
Пример #2
0
 $secondAt = strpos($def->internalRep, '@', $firstAt + 1);
 assert($firstAt === 0);
 assert($secondAt !== false);
 $text = trim(substr($def->internalRep, $firstAt + 1, $secondAt - $firstAt - 1));
 $normText = text_removeAccents(text_unicodeToLower($text));
 if (text_contains($normText, '-') && str_replace('-', '', $normText) == $l->unaccented) {
     print "[{$l->unaccented}] [{$normText}]\n";
     $parts = split('-', $normText);
     foreach ($parts as $part) {
         $lexems = Lexem::searchWordlists($part, true);
         if (!count($lexems)) {
             print "Creez lexemul [{$part}]\n";
             $lexem = Lexem::create($part, 'T', '1', '');
             $lexem->comment = 'Creat pentru despărțirea în cuvinte a unui ' . 'alt lexem';
             $lexem->save();
             $lexem->id = db_getLastInsertedId();
             $lexem->regenerateParadigm();
             $lexems[] = $lexem;
         }
         // Now associate every lexem with every definition
         foreach ($defs as $defAssoc) {
             foreach ($lexems as $lexemAssoc) {
                 LexemDefinitionMap::associate($lexemAssoc->id, $defAssoc->id);
             }
         }
     }
     foreach ($defs as $fixDef) {
         $fixFirstAt = strpos($fixDef->internalRep, '@');
         $fixSecondAt = strpos($fixDef->internalRep, '@', $fixFirstAt + 1);
         assert($fixFirstAt === 0);
         assert($fixSecondAt !== false);
Пример #3
0
<?php

require_once "../../phplib/util.php";
assert_options(ASSERT_BAIL, 1);
debug_off();
$dbResult = mysql_query('select * from model_mappings');
$mms = ModelMapping::populateFromDbResult($dbResult);
foreach ($mms as $mm) {
    print "Creating {$mm->modelType}{$mm->slaveNumber} from " . "{$mm->modelType}{$mm->masterNumber}\n";
    $master = Model::loadByTypeNumber($mm->modelType, $mm->masterNumber);
    // Create the model
    $slave = Model::create($mm->modelType, $mm->slaveNumber, "Derivat din {$mm->modelType}{$mm->masterNumber}");
    $slave->save();
    $slave->id = db_getLastInsertedId();
    // Clone the model descriptions
    $mds = ModelDescription::loadByModelId($master->id);
    foreach ($mds as $md) {
        $md->id = 0;
        $md->modelId = $slave->id;
        $md->save();
    }
    // Clone the participle model
    if ($mm->modelType == 'V') {
        $pm = ParticipleModel::loadByVerbModel($mm->masterNumber);
        $clonePm = ParticipleModel::create($mm->slaveNumber, $pm->participleModel);
        $clonePm->save();
    }
    // Delete the mapping
    mysql_query("delete from model_mappings where model_type = " . " '{$mm->modelType}' and slave_no = '{$mm->slaveNumber}'");
    // Regenerate the lexems. In theory the paradigm won't change, but we want
    // to actually see it.
Пример #4
0
    $i++;
    $word = new Word();
    $word->populateFromDbRow($dbRow);
    $definitionId = $word->conceptId;
    // We changed the column name already
    print "{$i}/{$numRows} " . $word->name . "... ";
    if ($exponent->name == $word->name) {
        // We already have a concept created for this name
        $conceptId = $exponent->conceptId;
        migrateDeclensionModels($word->id, $exponent->id);
        $word->delete();
        print "deleted.\n";
    } else {
        $concept = Concept::create($word->name, '');
        $concept->save();
        $conceptId = db_getLastInsertedId();
        $conceptIdToWordsMap[$conceptId] = array($word->name => $word->id);
        $word->conceptId = $conceptId;
        $word->save();
        $exponent = $word;
        print "created concept.\n";
    }
    $definitionIdToConceptIdMap[$definitionId] = $conceptId;
    $cdm = ConceptDefinitionMap::create($conceptId, $definitionId);
    $cdm->save();
}
mysql_free_result($dbResult);
// Go through all the words having Priority > 0
$dbResult = mysql_query("select * from Word where Priority > 0");
$numRows = mysql_num_rows($dbResult);
$i = 0;
Пример #5
0
<?php

require_once '../../phplib/util.php';
assert_options(ASSERT_BAIL, 1);
debug_off();
// Create the Latin model type if it doesn't exist
$model = Model::loadByTypeNumber('I', '2');
if (!$model) {
    print "Creating model I2 for biology terms\n";
    $model = Model::create('I', '2', '', 'termeni biologici');
    $model->save();
    $model->id = db_getLastInsertedId();
    $md = ModelDescription::create($model->id, 84, 0, 0, 1, NO_ACCENT_SHIFT, '');
    $md->save();
}
$dbResult = mysql_query("select * from lexems where lexem_model_type = 'T' " . "order by lexem_neaccentuat");
$seen = 0;
$removed = 0;
$biologyTerms = array('plantă', 'pom', 'arbore', 'arbust', 'bot', 'zool', 'mamifer', 'animal');
while (($dbRow = mysql_fetch_assoc($dbResult)) != null) {
    $l = Lexem::createFromDbRow($dbRow);
    $seen++;
    $defs = Definition::loadByLexemId($l->id);
    $matchingLexicon = false;
    $biology = false;
    $appears = false;
    foreach ($defs as $def) {
        if (str_replace('î', 'â', $def->lexicon) == str_replace('î', 'â', $l->unaccented)) {
            $matchingLexicon = true;
        }
        $rep = text_unicodeToLower($def->internalRep);
Пример #6
0
                } else {
                    $d->status = ST_PENDING;
                }
            }
        }
        if ($d->lexicon) {
            $lexems = Lexem::loadByUnaccented($d->lexicon);
            if (!count($lexems)) {
                $lexem = Lexem::create($d->lexicon, 'T', '1', '');
                $lexem->save();
                $lexem->id = db_getLastInsertedId();
                $lexem->regenerateParadigm();
                $lexems[] = $lexem;
            }
            $d->save();
            $d->id = db_getLastInsertedId();
            foreach ($lexems as $l) {
                $ldm = LexemDefinitionMap::create($l->id, $d->id);
                $ldm->save();
            }
        } else {
            print "Skipping [{$d->internalRep}]\n";
        }
    }
    if (++$linesSeen % 1000 == 0) {
        print "{$linesSeen} lines seen.\n";
    }
}
print "Skipped {$skipped} existing definitions\n";
/***************************************************************************/
function parseArguments()
Пример #7
0
     $parts = split(' ', $l->form);
     print text_padRight($l->form, 30);
     foreach ($parts as $part) {
         $part = trim($part);
         if (!$part || in_array($part, $stopWords)) {
             // Skip common words
             continue;
         }
         print '[';
         $baseForms = Lexem::searchWordlists($part, true);
         if (!count($baseForms)) {
             $baseForm = Lexem::create($part, 'T', '1', '');
             $baseForm->comment = "Provine din despărțirea lexemului [{$l->form}]";
             $baseForm->noAccent = true;
             $baseForm->save();
             $baseForm->id = db_getLastInsertedId();
             $baseForm->regenerateParadigm();
             $baseForms[] = $baseForm;
         }
         // Associate every definition with every lexem
         foreach ($baseForms as $baseForm) {
             print $baseForm->form . ' ';
             foreach ($defs as $def) {
                 LexemDefinitionMap::associate($baseForm->id, $def->id);
             }
         }
         print ']';
     }
     print "\n";
 }
 $l->delete();