function createLexemDefinitionMap() { LexemDefinitionMap::deleteAll(); $dbResult = db_selectAllConcepts(); print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n"; $seen = 0; while ($dbRow = mysql_fetch_assoc($dbResult)) { $concept = new Concept(); $concept->populateFromDbRow($dbRow); $words = Word::loadByConceptId($concept->id); $definitions = Definition::loadByConceptId($concept->id); if ($definitions) { // Select distinct words $distinctWords = array(); foreach ($words as $word) { $distinctWords[$word->name] = 1; } // For every word, look up all the lexems. Then map each of those lexems // to every definition. foreach ($distinctWords as $word => $ignored) { $lexems = Lexem::loadByUnaccented($word); // Create lexem if necessary so that we don't lose any words during the // migration if (count($lexems) == 0) { $lexem = Lexem::create($word, 'T', 1, ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexems[] = $lexem; $lexem->regenerateParadigm(); } foreach ($lexems as $lexem) { foreach ($definitions as $definition) { $ldm = LexemDefinitionMap::load($lexem->id, $definition->id); if (!$ldm) { $ldm = LexemDefinitionMap::create($lexem->id, $definition->id); $ldm->save(); } } } } } $seen++; if ($seen % 1000 == 0) { print "Seen: {$seen};\n"; } } print "Seen: {$seen};\n"; }
continue; } $firstAt = strpos($def->internalRep, '@'); $secondAt = strpos($def->internalRep, '@', $firstAt + 1); assert($firstAt === 0); assert($secondAt !== false); $text = trim(substr($def->internalRep, $firstAt + 1, $secondAt - $firstAt - 1)); $normText = text_removeAccents(text_unicodeToLower($text)); if (text_contains($normText, '-') && str_replace('-', '', $normText) == $l->unaccented) { print "[{$l->unaccented}] [{$normText}]\n"; $parts = split('-', $normText); foreach ($parts as $part) { $lexems = Lexem::searchWordlists($part, true); if (!count($lexems)) { print "Creez lexemul [{$part}]\n"; $lexem = Lexem::create($part, 'T', '1', ''); $lexem->comment = 'Creat pentru despărțirea în cuvinte a unui ' . 'alt lexem'; $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexem->regenerateParadigm(); $lexems[] = $lexem; } // Now associate every lexem with every definition foreach ($defs as $defAssoc) { foreach ($lexems as $lexemAssoc) { LexemDefinitionMap::associate($lexemAssoc->id, $defAssoc->id); } } } foreach ($defs as $fixDef) { $fixFirstAt = strpos($fixDef->internalRep, '@');
$definition->abbrevReview = count($ambiguousMatches) ? ABBREV_AMBIGUOUS : ABBREV_REVIEW_COMPLETE; $definition->save(); log_userLog("Added definition {$definition->id} ({$definition->lexicon})"); $ldms = array(); foreach ($lexemNames as $lexemName) { $lexemName = addslashes(AdminStringUtil::formatLexem($lexemName)); if ($lexemName) { $matches = Lexem::loadByExtendedName($lexemName); if (count($matches) >= 1) { foreach ($matches as $match) { LexemDefinitionMap::associate($match->id, $definition->id); log_userLog("Associating with lexem {$match->id} ({$match->form})"); } } else { // Create a new lexem. $lexem = Lexem::create($lexemName, 'T', '1', ''); $lexem->save(); $lexem->regenerateParadigm(); LexemDefinitionMap::associate($lexem->id, $definition->id); log_userLog("Created lexem {$lexem->id} ({$lexem->form})"); } } } FlashMessage::add('Definiția a fost trimisă. Un moderator o va examina în scurt timp. Vă mulțumim!', 'info'); util_redirect('contribuie'); } } else { smarty_assign('sourceId', session_getDefaultContribSourceId()); } smarty_assign('contribSources', Model::factory('Source')->where('canContribute', true)->order_by_asc('displayOrder')->find_many()); smarty_assign('page_title', 'Contribuie cu definiții');
smarty_assign('selectedModelNumber', $modelNumber); LocVersion::changeDatabase($locVersion); if ($modelNumber == -1) { $modelsToDisplay = FlexModel::loadByType($modelType); } else { $modelsToDisplay = array(Model::factory('FlexModel')->where('modelType', $modelType)->where('number', $modelNumber)->find_one()); } $lexems = array(); $paradigms = array(); foreach ($modelsToDisplay as $m) { // Load by canonical model, so if $modelType is V, look for a lexem with type V or VT. $l = Model::factory('Lexem')->select('Lexem.*')->join('ModelType', 'modelType = code')->where('canonical', $modelType)->where('modelNumber', $m->number)->where('form', $m->exponent)->limit(1)->find_one(); if ($l) { $paradigm = getExistingForms($l->id, $locVersion); } else { $l = Lexem::create($m->exponent, $modelType, $m->number, ''); $l->isLoc = true; $paradigm = getNewForms($l, $locVersion); } $lexems[] = $l; $paradigms[] = $paradigm; } smarty_assign('modelsToDisplay', $modelsToDisplay); smarty_assign('lexems', $lexems); smarty_assign('paradigms', $paradigms); } else { smarty_assign('selectedLocVersion', $locVersions[0]->name); // LocVersion::changeDatabase($locVersion); } $modelTypes = ModelType::loadCanonical(); $models = FlexModel::loadByType($modelType ? $modelType : $modelTypes[0]->code);
require_once "../../phplib/util.php"; ini_set('memory_limit', '512M'); ini_set('max_execution_time', '3600'); util_assertModerator(PRIV_LOC); util_assertNotMirror(); DebugInfo::disable(); $modelType = util_getRequestParameter('modelType'); $modelNumber = util_getRequestParameter('modelNumber'); $previewButton = util_getRequestParameter('previewButton'); $confirmButton = util_getRequestParameter('confirmButton'); $modelType = ModelType::canonicalize($modelType); $inflections = Model::factory('Inflection')->where('modelType', $modelType)->order_by_asc('rank')->find_many(); // Load the original data $model = Model::factory('FlexModel')->where('modelType', $modelType)->where('number', $modelNumber)->find_one(); $exponent = $model->exponent; $lexem = Lexem::create($exponent, $modelType, $modelNumber, ''); $ifs = $lexem->generateParadigm(); $mdMap = ModelDescription::getByModelIdMapByInflectionIdVariantApplOrder($model->id); $forms = array(); foreach ($inflections as $infl) { $forms[$infl->id] = array(); } foreach ($ifs as $if) { $forms[$if->inflectionId][] = array('form' => $if->form, 'isLoc' => $mdMap[$if->inflectionId][$if->variant][0]->isLoc, 'recommended' => $mdMap[$if->inflectionId][$if->variant][0]->recommended); } $participleNumber = $modelType == 'V' ? ParticipleModel::loadByVerbModel($modelNumber)->adjectiveModel : ''; if ($previewButton || $confirmButton) { // Load the new forms and exponent; $newModelNumber = util_getRequestParameter('newModelNumber'); $newExponent = util_getRequestParameter('newExponent'); $newDescription = util_getRequestParameter('newDescription');
function _cloneLexem($lexem) { $clone = Lexem::create($lexem->form, 'T', 1, ''); $clone->comment = $lexem->comment; $clone->description = $lexem->description ? "CLONĂ {$lexem->description}" : "CLONĂ"; $clone->noAccent = $lexem->noAccent; $clone->save(); // Clone the definition list $ldms = LexemDefinitionMap::get_all_by_lexemId($lexem->id); foreach ($ldms as $ldm) { LexemDefinitionMap::associate($clone->id, $ldm->definitionId); } $clone->regenerateParadigm(); return $clone; }
foreach ($lexemNames as $lexemName) { $lexemName = trim($lexemName); if ($lexemName) { $matches = Lexem::loadByExtendedName($lexemName); if (count($matches) >= 1) { foreach ($matches as $match) { if (!in_array($match->id, $lexemIds)) { $lexemIds[] = $match->id; $lexems[] = $match; $ldms[] = LexemDefinitionMap::create($match->id, $definitionId); } } } else { $hasErrors = true; FlashMessage::add("Lexemul <i>" . htmlentities($lexemName) . "</i> nu există. Folosiți lista de sugestii pentru a-l corecta."); $lexems[] = Lexem::create($lexemName, 0, '', ''); // We won't be needing $ldms since there is an error. } } } } else { $lexems = Model::factory('Lexem')->select('Lexem.*')->join('LexemDefinitionMap', 'Lexem.id = lexemId')->where('definitionId', $definitionId)->find_many(); } if ($commentContents) { if (!$comment) { $comment = Model::factory('comment')->create(); $commend->status = ST_ACTIVE; $comment->definitionId = $definitionId; } $newContents = AdminStringUtil::internalizeDefinition($commentContents, $sourceId); if ($newContents != $comment->contents) {
public function regenerateLongInfinitive() { $infl = Inflection::loadLongInfinitive(); $ifs = Model::factory('InflectedForm')->where('lexemId', $this->id)->where('inflectionId', $infl->id)->find_many(); $f107 = Model::factory('FlexModel')->where('modelType', 'F')->where('number', '107')->find_one(); $f113 = Model::factory('FlexModel')->where('modelType', 'F')->where('number', '113')->find_one(); foreach ($ifs as $if) { $model = StringUtil::endsWith($if->formNoAccent, 'are') ? $f113 : $f107; // Load an existing lexem only if it has one of the models F113, F107 or T1. Otherwise create a new lexem. $lexems = Lexem::get_all_by_formNoAccent($if->formNoAccent); $lexem = null; foreach ($lexems as $l) { if ($l->modelType == 'T' || $l->modelType == 'F' && $l->modelNumber == $model->number) { $lexem = $l; } else { if ($this->isLoc && !$l->isLoc) { FlashMessage::add("Lexemul {$l->formNoAccent} ({$l->modelType}{$l->modelNumber}), care nu este în LOC, nu a fost modificat.", 'info'); } } } if ($lexem) { $lexem->modelType = 'F'; $lexem->modelNumber = $model->number; $lexem->restriction = ''; if ($this->isLoc && !$lexem->isLoc) { $lexem->isLoc = $this->isLoc; FlashMessage::add("Lexemul {$lexem->formNoAccent}, care nu era în LOC, a fost inclus automat în LOC.", 'info'); } $lexem->noAccent = false; $lexem->save(); } else { $lexem = Lexem::create($if->form, 'F', $model->number, ''); $lexem->isLoc = $this->isLoc; $lexem->save(); // Also associate the new lexem with the same definitions as $this. $ldms = LexemDefinitionMap::get_all_by_lexemId($this->id); foreach ($ldms as $ldm) { LexemDefinitionMap::associate($lexem->id, $ldm->definitionId); } FlashMessage::add("Am creat automat lexemul {$lexem->formNoAccent} (F{$lexem->modelNumber}) și l-am asociat cu toate definițiile verbului.", 'info'); } $lexem->regenerateParadigm(); } }
function parseWordField($word, $modelType, $modelNo, $restr) { $word = trim($word); // Look for a slash not included in brackets $len = mb_strlen($word); $parCount = 0; $i = 0; $found = false; while ($i < $len && !$found) { $c = text_getCharAt($word, $i); if ($c == '[' || $c == '(') { $parCount++; } else { if ($c == ']' || $c == ')') { $parCount--; } } if ($c == '/' && !$parCount) { $found = true; } else { $i++; } } if ($found) { $r1 = parseWordField(mb_substr($word, 0, $i), $modelType, $modelNo, $restr); $r2 = parseWordField(mb_substr($word, $i + 1), $modelType, $modelNo, $restr); return array_merge($r1, $r2); } if (text_endsWith($word, ']')) { $pos = mb_strrpos($word, '['); assert($pos !== false); $extra = mb_substr($word, $pos); $results = parseWordField(mb_substr($word, 0, $pos), $modelType, $modelNo, $restr); assert(count($results)); appendExtra($results[count($results) - 1], $extra); return $results; } if (text_endsWith($word, ')')) { $pos = mb_strrpos($word, '('); assert($pos !== false); $extra = mb_substr($word, $pos); $results = parseWordField(mb_substr($word, 0, $pos), $modelType, $modelNo, $restr); assert(count($results)); // See if $extra contains a model number. If so, use it on the last model. list($modelType, $modelNo, $restr) = parseModel($extra); if ($modelType && $modelNo) { $results[count($results) - 1]->modelType = $modelType; $results[count($results) - 1]->modelNumber = $modelNo; $results[count($results) - 1]->restriction = $restr; } appendExtra($results[count($results) - 1], $extra); // If $extra dictates a part of speech, apply it to all the lexems if (text_contains($extra, 's.f.inv.') || text_contains($extra, 's.f. în expr.') || text_contains($extra, 's.m.inv.') || text_contains($extra, 's.n.inv.') || text_contains($extra, 'adj.inv.') || text_contains($extra, 'adv.') || text_contains($extra, 'conj.') || text_contains($extra, 'prep.') || text_contains($extra, 'interj.')) { foreach ($results as $l) { $l->modelType = 'I'; $l->modelNumber = '1'; $l->restriction = ''; } } return $results; } $parts = split(',', $word); if (count($parts) >= 2) { $results = array(); foreach ($parts as $part) { $results = array_merge($results, parseWordField($part, $modelType, $modelNo, $restr)); } return $results; } $extra = text_contains($word, '-') ? $word : ''; $word = str_replace('-', '', $word); $len = mb_strlen($word); $found = false; for ($i = 0; $i < $len && !$found; $i++) { $c = text_getCharAt($word, $i); if (text_isLowercase($c)) { $found = true; $word = text_insert($word, "'", $i); } } $word = text_unicodeToLower($word); $l = Lexem::create($word, $modelType, $modelNo, $restr); appendExtra($l, $extra); $l->isLoc = true; return array($l); }
$d->lexicon = text_internalizeWordName($parts[0]); $d->status = ST_PENDING; } else { if (count($parts) == 2 && text_endsWith($parts[1], '/')) { // Use the first part only, because the second one is the pronunciation $d->lexicon = text_internalizeWordName($parts[0]); //print "Using [{$d->lexicon}] for " . mb_substr($d->internalRep, 0, 50) . "\n"; } else { $d->status = ST_PENDING; } } } if ($d->lexicon) { $lexems = Lexem::loadByUnaccented($d->lexicon); if (!count($lexems)) { $lexem = Lexem::create($d->lexicon, 'T', '1', ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexem->regenerateParadigm(); $lexems[] = $lexem; } $d->save(); $d->id = db_getLastInsertedId(); foreach ($lexems as $l) { $ldm = LexemDefinitionMap::create($l->id, $d->id); $ldm->save(); } } else { print "Skipping [{$d->internalRep}]\n"; } }
$l = Lexem::createFromDbRow($dbRow); $seen++; $defs = Definition::loadByLexemId($l->id); if (count($defs)) { $parts = split(' ', $l->form); print text_padRight($l->form, 30); foreach ($parts as $part) { $part = trim($part); if (!$part || in_array($part, $stopWords)) { // Skip common words continue; } print '['; $baseForms = Lexem::searchWordlists($part, true); if (!count($baseForms)) { $baseForm = Lexem::create($part, 'T', '1', ''); $baseForm->comment = "Provine din despărțirea lexemului [{$l->form}]"; $baseForm->noAccent = true; $baseForm->save(); $baseForm->id = db_getLastInsertedId(); $baseForm->regenerateParadigm(); $baseForms[] = $baseForm; } // Associate every definition with every lexem foreach ($baseForms as $baseForm) { print $baseForm->form . ' '; foreach ($defs as $def) { LexemDefinitionMap::associate($baseForm->id, $def->id); } } print ']';