function createLexemDefinitionMap() { LexemDefinitionMap::deleteAll(); $dbResult = db_selectAllConcepts(); print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n"; $seen = 0; while ($dbRow = mysql_fetch_assoc($dbResult)) { $concept = new Concept(); $concept->populateFromDbRow($dbRow); $words = Word::loadByConceptId($concept->id); $definitions = Definition::loadByConceptId($concept->id); if ($definitions) { // Select distinct words $distinctWords = array(); foreach ($words as $word) { $distinctWords[$word->name] = 1; } // For every word, look up all the lexems. Then map each of those lexems // to every definition. foreach ($distinctWords as $word => $ignored) { $lexems = Lexem::loadByUnaccented($word); // Create lexem if necessary so that we don't lose any words during the // migration if (count($lexems) == 0) { $lexem = Lexem::create($word, 'T', 1, ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexems[] = $lexem; $lexem->regenerateParadigm(); } foreach ($lexems as $lexem) { foreach ($definitions as $definition) { $ldm = LexemDefinitionMap::load($lexem->id, $definition->id); if (!$ldm) { $ldm = LexemDefinitionMap::create($lexem->id, $definition->id); $ldm->save(); } } } } } $seen++; if ($seen % 1000 == 0) { print "Seen: {$seen};\n"; } } print "Seen: {$seen};\n"; }
$dbResult = mysql_query($query); $fixed = 0; $prevUnaccented = ''; while (($dbRow = mysql_fetch_assoc($dbResult)) != null) { $l = Lexem::createFromDbRow($dbRow); if ($prevUnaccented != $l->unaccented) { $defs = loadDefinitions($l->id); if (count($defs)) { $isPart = isParticiple($l); $isAdj = isAdjective($defs) || $isPart; $isMf = isMfNoun($defs); $isN = isNNoun($defs); $isM = isMNoun($defs) && !$isMf; $femForm = loadFeminineForm($l->id); $isF = countDefinitionsByLexicon($femForm); $homonyms = Lexem::loadByUnaccented($l->unaccented); // Very often, the participle also acts os homonym. This is normal. // Report other cases of N because we don't have mapping tables for N. if (count($homonyms) == 1 && $isN && $isPart) { $isN = false; } // Report cases which (1) contain a N lexem outside of the above case, OR // (2) Do not seem to generate all the M and F forms. if (!$isAdj && !$isMf && !($isM && $isF) || $isN) { //printLexem($homonyms, $isAdj, $isMf, $isM, $isN, $isF); } else { $fixed += replaceLexems($homonyms, $isAdj, $isMf, $isM, $isN, $isF, $femForm); } } } $prevUnaccented = $l->unaccented;
assert(count($parts) >= 2); if ($parts[count($parts) - 1] == 'II.') { $d->lexicon = text_internalizeWordName($parts[0]); $d->status = ST_PENDING; } else { if (count($parts) == 2 && text_endsWith($parts[1], '/')) { // Use the first part only, because the second one is the pronunciation $d->lexicon = text_internalizeWordName($parts[0]); //print "Using [{$d->lexicon}] for " . mb_substr($d->internalRep, 0, 50) . "\n"; } else { $d->status = ST_PENDING; } } } if ($d->lexicon) { $lexems = Lexem::loadByUnaccented($d->lexicon); if (!count($lexems)) { $lexem = Lexem::create($d->lexicon, 'T', '1', ''); $lexem->save(); $lexem->id = db_getLastInsertedId(); $lexem->regenerateParadigm(); $lexems[] = $lexem; } $d->save(); $d->id = db_getLastInsertedId(); foreach ($lexems as $l) { $ldm = LexemDefinitionMap::create($l->id, $d->id); $ldm->save(); } } else { print "Skipping [{$d->internalRep}]\n";