Beispiel #1
function createLexemDefinitionMap()
    $dbResult = db_selectAllConcepts();
    print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n";
    $seen = 0;
    while ($dbRow = mysql_fetch_assoc($dbResult)) {
        $concept = new Concept();
        $words = Word::loadByConceptId($concept->id);
        $definitions = Definition::loadByConceptId($concept->id);
        if ($definitions) {
            // Select distinct words
            $distinctWords = array();
            foreach ($words as $word) {
                $distinctWords[$word->name] = 1;
            // For every word, look up all the lexems. Then map each of those lexems
            // to every definition.
            foreach ($distinctWords as $word => $ignored) {
                $lexems = Lexem::loadByUnaccented($word);
                // Create lexem if necessary so that we don't lose any words during the
                // migration
                if (count($lexems) == 0) {
                    $lexem = Lexem::create($word, 'T', 1, '');
                    $lexem->id = db_getLastInsertedId();
                    $lexems[] = $lexem;
                foreach ($lexems as $lexem) {
                    foreach ($definitions as $definition) {
                        $ldm = LexemDefinitionMap::load($lexem->id, $definition->id);
                        if (!$ldm) {
                            $ldm = LexemDefinitionMap::create($lexem->id, $definition->id);
        if ($seen % 1000 == 0) {
            print "Seen: {$seen};\n";
    print "Seen: {$seen};\n";
Beispiel #2
 $firstAt = strpos($def->internalRep, '@');
 $secondAt = strpos($def->internalRep, '@', $firstAt + 1);
 assert($firstAt === 0);
 assert($secondAt !== false);
 $text = trim(substr($def->internalRep, $firstAt + 1, $secondAt - $firstAt - 1));
 $normText = text_removeAccents(text_unicodeToLower($text));
 if (text_contains($normText, '-') && str_replace('-', '', $normText) == $l->unaccented) {
     print "[{$l->unaccented}] [{$normText}]\n";
     $parts = split('-', $normText);
     foreach ($parts as $part) {
         $lexems = Lexem::searchWordlists($part, true);
         if (!count($lexems)) {
             print "Creez lexemul [{$part}]\n";
             $lexem = Lexem::create($part, 'T', '1', '');
             $lexem->comment = 'Creat pentru despărțirea în cuvinte a unui ' . 'alt lexem';
             $lexem->id = db_getLastInsertedId();
             $lexems[] = $lexem;
         // Now associate every lexem with every definition
         foreach ($defs as $defAssoc) {
             foreach ($lexems as $lexemAssoc) {
                 LexemDefinitionMap::associate($lexemAssoc->id, $defAssoc->id);
     foreach ($defs as $fixDef) {
         $fixFirstAt = strpos($fixDef->internalRep, '@');
        $definition->abbrevReview = count($ambiguousMatches) ? ABBREV_AMBIGUOUS : ABBREV_REVIEW_COMPLETE;
        log_userLog("Added definition {$definition->id} ({$definition->lexicon})");
        $ldms = array();
        foreach ($lexemNames as $lexemName) {
            $lexemName = addslashes(AdminStringUtil::formatLexem($lexemName));
            if ($lexemName) {
                $matches = Lexem::loadByExtendedName($lexemName);
                if (count($matches) >= 1) {
                    foreach ($matches as $match) {
                        LexemDefinitionMap::associate($match->id, $definition->id);
                        log_userLog("Associating with lexem {$match->id} ({$match->form})");
                } else {
                    // Create a new lexem.
                    $lexem = Lexem::create($lexemName, 'T', '1', '');
                    LexemDefinitionMap::associate($lexem->id, $definition->id);
                    log_userLog("Created lexem {$lexem->id} ({$lexem->form})");
        FlashMessage::add('Definiția a fost trimisă. Un moderator o va examina în scurt timp. Vă mulțumim!', 'info');
} else {
    smarty_assign('sourceId', session_getDefaultContribSourceId());
smarty_assign('contribSources', Model::factory('Source')->where('canContribute', true)->order_by_asc('displayOrder')->find_many());
smarty_assign('page_title', 'Contribuie cu definiții');
    smarty_assign('selectedModelNumber', $modelNumber);
    if ($modelNumber == -1) {
        $modelsToDisplay = FlexModel::loadByType($modelType);
    } else {
        $modelsToDisplay = array(Model::factory('FlexModel')->where('modelType', $modelType)->where('number', $modelNumber)->find_one());
    $lexems = array();
    $paradigms = array();
    foreach ($modelsToDisplay as $m) {
        // Load by canonical model, so if $modelType is V, look for a lexem with type V or VT.
        $l = Model::factory('Lexem')->select('Lexem.*')->join('ModelType', 'modelType = code')->where('canonical', $modelType)->where('modelNumber', $m->number)->where('form', $m->exponent)->limit(1)->find_one();
        if ($l) {
            $paradigm = getExistingForms($l->id, $locVersion);
        } else {
            $l = Lexem::create($m->exponent, $modelType, $m->number, '');
            $l->isLoc = true;
            $paradigm = getNewForms($l, $locVersion);
        $lexems[] = $l;
        $paradigms[] = $paradigm;
    smarty_assign('modelsToDisplay', $modelsToDisplay);
    smarty_assign('lexems', $lexems);
    smarty_assign('paradigms', $paradigms);
} else {
    smarty_assign('selectedLocVersion', $locVersions[0]->name);
    // LocVersion::changeDatabase($locVersion);
$modelTypes = ModelType::loadCanonical();
$models = FlexModel::loadByType($modelType ? $modelType : $modelTypes[0]->code);
require_once "../../phplib/util.php";
ini_set('memory_limit', '512M');
ini_set('max_execution_time', '3600');
$modelType = util_getRequestParameter('modelType');
$modelNumber = util_getRequestParameter('modelNumber');
$previewButton = util_getRequestParameter('previewButton');
$confirmButton = util_getRequestParameter('confirmButton');
$modelType = ModelType::canonicalize($modelType);
$inflections = Model::factory('Inflection')->where('modelType', $modelType)->order_by_asc('rank')->find_many();
// Load the original data
$model = Model::factory('FlexModel')->where('modelType', $modelType)->where('number', $modelNumber)->find_one();
$exponent = $model->exponent;
$lexem = Lexem::create($exponent, $modelType, $modelNumber, '');
$ifs = $lexem->generateParadigm();
$mdMap = ModelDescription::getByModelIdMapByInflectionIdVariantApplOrder($model->id);
$forms = array();
foreach ($inflections as $infl) {
    $forms[$infl->id] = array();
foreach ($ifs as $if) {
    $forms[$if->inflectionId][] = array('form' => $if->form, 'isLoc' => $mdMap[$if->inflectionId][$if->variant][0]->isLoc, 'recommended' => $mdMap[$if->inflectionId][$if->variant][0]->recommended);
$participleNumber = $modelType == 'V' ? ParticipleModel::loadByVerbModel($modelNumber)->adjectiveModel : '';
if ($previewButton || $confirmButton) {
    // Load the new forms and exponent;
    $newModelNumber = util_getRequestParameter('newModelNumber');
    $newExponent = util_getRequestParameter('newExponent');
    $newDescription = util_getRequestParameter('newDescription');
function _cloneLexem($lexem)
    $clone = Lexem::create($lexem->form, 'T', 1, '');
    $clone->comment = $lexem->comment;
    $clone->description = $lexem->description ? "CLONĂ {$lexem->description}" : "CLONĂ";
    $clone->noAccent = $lexem->noAccent;
    // Clone the definition list
    $ldms = LexemDefinitionMap::get_all_by_lexemId($lexem->id);
    foreach ($ldms as $ldm) {
        LexemDefinitionMap::associate($clone->id, $ldm->definitionId);
    return $clone;
    foreach ($lexemNames as $lexemName) {
        $lexemName = trim($lexemName);
        if ($lexemName) {
            $matches = Lexem::loadByExtendedName($lexemName);
            if (count($matches) >= 1) {
                foreach ($matches as $match) {
                    if (!in_array($match->id, $lexemIds)) {
                        $lexemIds[] = $match->id;
                        $lexems[] = $match;
                        $ldms[] = LexemDefinitionMap::create($match->id, $definitionId);
            } else {
                $hasErrors = true;
                FlashMessage::add("Lexemul <i>" . htmlentities($lexemName) . "</i> nu există. Folosiți lista de sugestii pentru a-l corecta.");
                $lexems[] = Lexem::create($lexemName, 0, '', '');
                // We won't be needing $ldms since there is an error.
} else {
    $lexems = Model::factory('Lexem')->select('Lexem.*')->join('LexemDefinitionMap', ' = lexemId')->where('definitionId', $definitionId)->find_many();
if ($commentContents) {
    if (!$comment) {
        $comment = Model::factory('comment')->create();
        $commend->status = ST_ACTIVE;
        $comment->definitionId = $definitionId;
    $newContents = AdminStringUtil::internalizeDefinition($commentContents, $sourceId);
    if ($newContents != $comment->contents) {
Beispiel #8
 public function regenerateLongInfinitive()
     $infl = Inflection::loadLongInfinitive();
     $ifs = Model::factory('InflectedForm')->where('lexemId', $this->id)->where('inflectionId', $infl->id)->find_many();
     $f107 = Model::factory('FlexModel')->where('modelType', 'F')->where('number', '107')->find_one();
     $f113 = Model::factory('FlexModel')->where('modelType', 'F')->where('number', '113')->find_one();
     foreach ($ifs as $if) {
         $model = StringUtil::endsWith($if->formNoAccent, 'are') ? $f113 : $f107;
         // Load an existing lexem only if it has one of the models F113, F107 or T1. Otherwise create a new lexem.
         $lexems = Lexem::get_all_by_formNoAccent($if->formNoAccent);
         $lexem = null;
         foreach ($lexems as $l) {
             if ($l->modelType == 'T' || $l->modelType == 'F' && $l->modelNumber == $model->number) {
                 $lexem = $l;
             } else {
                 if ($this->isLoc && !$l->isLoc) {
                     FlashMessage::add("Lexemul {$l->formNoAccent} ({$l->modelType}{$l->modelNumber}), care nu este în LOC, nu a fost modificat.", 'info');
         if ($lexem) {
             $lexem->modelType = 'F';
             $lexem->modelNumber = $model->number;
             $lexem->restriction = '';
             if ($this->isLoc && !$lexem->isLoc) {
                 $lexem->isLoc = $this->isLoc;
                 FlashMessage::add("Lexemul {$lexem->formNoAccent}, care nu era în LOC, a fost inclus automat în LOC.", 'info');
             $lexem->noAccent = false;
         } else {
             $lexem = Lexem::create($if->form, 'F', $model->number, '');
             $lexem->isLoc = $this->isLoc;
             // Also associate the new lexem with the same definitions as $this.
             $ldms = LexemDefinitionMap::get_all_by_lexemId($this->id);
             foreach ($ldms as $ldm) {
                 LexemDefinitionMap::associate($lexem->id, $ldm->definitionId);
             FlashMessage::add("Am creat automat lexemul {$lexem->formNoAccent} (F{$lexem->modelNumber}) și l-am asociat cu toate definițiile verbului.", 'info');
Beispiel #9
function parseWordField($word, $modelType, $modelNo, $restr)
    $word = trim($word);
    // Look for a slash not included in brackets
    $len = mb_strlen($word);
    $parCount = 0;
    $i = 0;
    $found = false;
    while ($i < $len && !$found) {
        $c = text_getCharAt($word, $i);
        if ($c == '[' || $c == '(') {
        } else {
            if ($c == ']' || $c == ')') {
        if ($c == '/' && !$parCount) {
            $found = true;
        } else {
    if ($found) {
        $r1 = parseWordField(mb_substr($word, 0, $i), $modelType, $modelNo, $restr);
        $r2 = parseWordField(mb_substr($word, $i + 1), $modelType, $modelNo, $restr);
        return array_merge($r1, $r2);
    if (text_endsWith($word, ']')) {
        $pos = mb_strrpos($word, '[');
        assert($pos !== false);
        $extra = mb_substr($word, $pos);
        $results = parseWordField(mb_substr($word, 0, $pos), $modelType, $modelNo, $restr);
        appendExtra($results[count($results) - 1], $extra);
        return $results;
    if (text_endsWith($word, ')')) {
        $pos = mb_strrpos($word, '(');
        assert($pos !== false);
        $extra = mb_substr($word, $pos);
        $results = parseWordField(mb_substr($word, 0, $pos), $modelType, $modelNo, $restr);
        // See if $extra contains a model number. If so, use it on the last model.
        list($modelType, $modelNo, $restr) = parseModel($extra);
        if ($modelType && $modelNo) {
            $results[count($results) - 1]->modelType = $modelType;
            $results[count($results) - 1]->modelNumber = $modelNo;
            $results[count($results) - 1]->restriction = $restr;
        appendExtra($results[count($results) - 1], $extra);
        // If $extra dictates a part of speech, apply it to all the lexems
        if (text_contains($extra, 's.f.inv.') || text_contains($extra, 's.f. în expr.') || text_contains($extra, 's.m.inv.') || text_contains($extra, 's.n.inv.') || text_contains($extra, 'adj.inv.') || text_contains($extra, 'adv.') || text_contains($extra, 'conj.') || text_contains($extra, 'prep.') || text_contains($extra, 'interj.')) {
            foreach ($results as $l) {
                $l->modelType = 'I';
                $l->modelNumber = '1';
                $l->restriction = '';
        return $results;
    $parts = split(',', $word);
    if (count($parts) >= 2) {
        $results = array();
        foreach ($parts as $part) {
            $results = array_merge($results, parseWordField($part, $modelType, $modelNo, $restr));
        return $results;
    $extra = text_contains($word, '-') ? $word : '';
    $word = str_replace('-', '', $word);
    $len = mb_strlen($word);
    $found = false;
    for ($i = 0; $i < $len && !$found; $i++) {
        $c = text_getCharAt($word, $i);
        if (text_isLowercase($c)) {
            $found = true;
            $word = text_insert($word, "'", $i);
    $word = text_unicodeToLower($word);
    $l = Lexem::create($word, $modelType, $modelNo, $restr);
    appendExtra($l, $extra);
    $l->isLoc = true;
    return array($l);
Beispiel #10
             $d->lexicon = text_internalizeWordName($parts[0]);
             $d->status = ST_PENDING;
         } else {
             if (count($parts) == 2 && text_endsWith($parts[1], '/')) {
                 // Use the first part only, because the second one is the pronunciation
                 $d->lexicon = text_internalizeWordName($parts[0]);
                 //print "Using [{$d->lexicon}] for " . mb_substr($d->internalRep, 0, 50) . "\n";
             } else {
                 $d->status = ST_PENDING;
     if ($d->lexicon) {
         $lexems = Lexem::loadByUnaccented($d->lexicon);
         if (!count($lexems)) {
             $lexem = Lexem::create($d->lexicon, 'T', '1', '');
             $lexem->id = db_getLastInsertedId();
             $lexems[] = $lexem;
         $d->id = db_getLastInsertedId();
         foreach ($lexems as $l) {
             $ldm = LexemDefinitionMap::create($l->id, $d->id);
     } else {
         print "Skipping [{$d->internalRep}]\n";
Beispiel #11
 $l = Lexem::createFromDbRow($dbRow);
 $defs = Definition::loadByLexemId($l->id);
 if (count($defs)) {
     $parts = split(' ', $l->form);
     print text_padRight($l->form, 30);
     foreach ($parts as $part) {
         $part = trim($part);
         if (!$part || in_array($part, $stopWords)) {
             // Skip common words
         print '[';
         $baseForms = Lexem::searchWordlists($part, true);
         if (!count($baseForms)) {
             $baseForm = Lexem::create($part, 'T', '1', '');
             $baseForm->comment = "Provine din despărțirea lexemului [{$l->form}]";
             $baseForm->noAccent = true;
             $baseForm->id = db_getLastInsertedId();
             $baseForms[] = $baseForm;
         // Associate every definition with every lexem
         foreach ($baseForms as $baseForm) {
             print $baseForm->form . ' ';
             foreach ($defs as $def) {
                 LexemDefinitionMap::associate($baseForm->id, $def->id);
         print ']';