function createLexemDefinitionMap() { LexemDefinitionMap::deleteAll(); $dbResult = db_selectAllConcepts(); print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n"; $seen = 0; while ($dbRow = mysql_fetch_assoc($dbResult)) { $concept = new Concept(); $concept->populateFromDbRow($dbRow); $words = Word::loadByConceptId($concept->id); $definitions = Definition::loadByConceptId($concept->id); if ($definitions) { // Select distinct words $distinctWords = array(); foreach ($words as $word) { $distinctWords[$word->name] = 1; } // For every word, look up all the lexems. Then map each of those lexems // to every definition. foreach ($distinctWords as $word => $ignored) { $lexems = Lexem::loadByUnaccented($word); // Create lexem if necessary so that we don't lose any words during the // migration if (count($lexems) == 0) { $lexem = Lexem::create($word, 'T', 1, ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexems[] = $lexem; $lexem->regenerateParadigm(); } foreach ($lexems as $lexem) { foreach ($definitions as $definition) { $ldm = LexemDefinitionMap::load($lexem->id, $definition->id); if (!$ldm) { $ldm = LexemDefinitionMap::create($lexem->id, $definition->id); $ldm->save(); } } } } } $seen++; if ($seen % 1000 == 0) { print "Seen: {$seen};\n"; } } print "Seen: {$seen};\n"; }
$secondAt = strpos($def->internalRep, '@', $firstAt + 1); assert($firstAt === 0); assert($secondAt !== false); $text = trim(substr($def->internalRep, $firstAt + 1, $secondAt - $firstAt - 1)); $normText = text_removeAccents(text_unicodeToLower($text)); if (text_contains($normText, '-') && str_replace('-', '', $normText) == $l->unaccented) { print "[{$l->unaccented}] [{$normText}]\n"; $parts = split('-', $normText); foreach ($parts as $part) { $lexems = Lexem::searchWordlists($part, true); if (!count($lexems)) { print "Creez lexemul [{$part}]\n"; $lexem = Lexem::create($part, 'T', '1', ''); $lexem->comment = 'Creat pentru despărțirea în cuvinte a unui ' . 'alt lexem'; $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexem->regenerateParadigm(); $lexems[] = $lexem; } // Now associate every lexem with every definition foreach ($defs as $defAssoc) { foreach ($lexems as $lexemAssoc) { LexemDefinitionMap::associate($lexemAssoc->id, $defAssoc->id); } } } foreach ($defs as $fixDef) { $fixFirstAt = strpos($fixDef->internalRep, '@'); $fixSecondAt = strpos($fixDef->internalRep, '@', $fixFirstAt + 1); assert($fixFirstAt === 0); assert($fixSecondAt !== false);
<?php require_once "../../phplib/util.php"; assert_options(ASSERT_BAIL, 1); debug_off(); $dbResult = mysql_query('select * from model_mappings'); $mms = ModelMapping::populateFromDbResult($dbResult); foreach ($mms as $mm) { print "Creating {$mm->modelType}{$mm->slaveNumber} from " . "{$mm->modelType}{$mm->masterNumber}\n"; $master = Model::loadByTypeNumber($mm->modelType, $mm->masterNumber); // Create the model $slave = Model::create($mm->modelType, $mm->slaveNumber, "Derivat din {$mm->modelType}{$mm->masterNumber}"); $slave->save(); $slave->id = db_getLastInsertedId(); // Clone the model descriptions $mds = ModelDescription::loadByModelId($master->id); foreach ($mds as $md) { $md->id = 0; $md->modelId = $slave->id; $md->save(); } // Clone the participle model if ($mm->modelType == 'V') { $pm = ParticipleModel::loadByVerbModel($mm->masterNumber); $clonePm = ParticipleModel::create($mm->slaveNumber, $pm->participleModel); $clonePm->save(); } // Delete the mapping mysql_query("delete from model_mappings where model_type = " . " '{$mm->modelType}' and slave_no = '{$mm->slaveNumber}'"); // Regenerate the lexems. In theory the paradigm won't change, but we want // to actually see it.
$i++; $word = new Word(); $word->populateFromDbRow($dbRow); $definitionId = $word->conceptId; // We changed the column name already print "{$i}/{$numRows} " . $word->name . "... "; if ($exponent->name == $word->name) { // We already have a concept created for this name $conceptId = $exponent->conceptId; migrateDeclensionModels($word->id, $exponent->id); $word->delete(); print "deleted.\n"; } else { $concept = Concept::create($word->name, ''); $concept->save(); $conceptId = db_getLastInsertedId(); $conceptIdToWordsMap[$conceptId] = array($word->name => $word->id); $word->conceptId = $conceptId; $word->save(); $exponent = $word; print "created concept.\n"; } $definitionIdToConceptIdMap[$definitionId] = $conceptId; $cdm = ConceptDefinitionMap::create($conceptId, $definitionId); $cdm->save(); } mysql_free_result($dbResult); // Go through all the words having Priority > 0 $dbResult = mysql_query("select * from Word where Priority > 0"); $numRows = mysql_num_rows($dbResult); $i = 0;
<?php require_once '../../phplib/util.php'; assert_options(ASSERT_BAIL, 1); debug_off(); // Create the Latin model type if it doesn't exist $model = Model::loadByTypeNumber('I', '2'); if (!$model) { print "Creating model I2 for biology terms\n"; $model = Model::create('I', '2', '', 'termeni biologici'); $model->save(); $model->id = db_getLastInsertedId(); $md = ModelDescription::create($model->id, 84, 0, 0, 1, NO_ACCENT_SHIFT, ''); $md->save(); } $dbResult = mysql_query("select * from lexems where lexem_model_type = 'T' " . "order by lexem_neaccentuat"); $seen = 0; $removed = 0; $biologyTerms = array('plantă', 'pom', 'arbore', 'arbust', 'bot', 'zool', 'mamifer', 'animal'); while (($dbRow = mysql_fetch_assoc($dbResult)) != null) { $l = Lexem::createFromDbRow($dbRow); $seen++; $defs = Definition::loadByLexemId($l->id); $matchingLexicon = false; $biology = false; $appears = false; foreach ($defs as $def) { if (str_replace('î', 'â', $def->lexicon) == str_replace('î', 'â', $l->unaccented)) { $matchingLexicon = true; } $rep = text_unicodeToLower($def->internalRep);
} else { $d->status = ST_PENDING; } } } if ($d->lexicon) { $lexems = Lexem::loadByUnaccented($d->lexicon); if (!count($lexems)) { $lexem = Lexem::create($d->lexicon, 'T', '1', ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexem->regenerateParadigm(); $lexems[] = $lexem; } $d->save(); $d->id = db_getLastInsertedId(); foreach ($lexems as $l) { $ldm = LexemDefinitionMap::create($l->id, $d->id); $ldm->save(); } } else { print "Skipping [{$d->internalRep}]\n"; } } if (++$linesSeen % 1000 == 0) { print "{$linesSeen} lines seen.\n"; } } print "Skipped {$skipped} existing definitions\n"; /***************************************************************************/ function parseArguments()
$parts = split(' ', $l->form); print text_padRight($l->form, 30); foreach ($parts as $part) { $part = trim($part); if (!$part || in_array($part, $stopWords)) { // Skip common words continue; } print '['; $baseForms = Lexem::searchWordlists($part, true); if (!count($baseForms)) { $baseForm = Lexem::create($part, 'T', '1', ''); $baseForm->comment = "Provine din despărțirea lexemului [{$l->form}]"; $baseForm->noAccent = true; $baseForm->save(); $baseForm->id = db_getLastInsertedId(); $baseForm->regenerateParadigm(); $baseForms[] = $baseForm; } // Associate every definition with every lexem foreach ($baseForms as $baseForm) { print $baseForm->form . ' '; foreach ($defs as $def) { LexemDefinitionMap::associate($baseForm->id, $def->id); } } print ']'; } print "\n"; } $l->delete();