Ejemplo n.º 1
0
function createLexemDefinitionMap()
{
    LexemDefinitionMap::deleteAll();
    $dbResult = db_selectAllConcepts();
    print "Migrating " . mysql_num_rows($dbResult) . " concepts...\n";
    $seen = 0;
    while ($dbRow = mysql_fetch_assoc($dbResult)) {
        $concept = new Concept();
        $concept->populateFromDbRow($dbRow);
        $words = Word::loadByConceptId($concept->id);
        $definitions = Definition::loadByConceptId($concept->id);
        if ($definitions) {
            // Select distinct words
            $distinctWords = array();
            foreach ($words as $word) {
                $distinctWords[$word->name] = 1;
            }
            // For every word, look up all the lexems. Then map each of those lexems
            // to every definition.
            foreach ($distinctWords as $word => $ignored) {
                $lexems = Lexem::loadByUnaccented($word);
                // Create lexem if necessary so that we don't lose any words during the
                // migration
                if (count($lexems) == 0) {
                    $lexem = Lexem::create($word, 'T', 1, '');
                    $lexem->save();
                    $lexem->id = db_getLastInsertedId();
                    $lexems[] = $lexem;
                    $lexem->regenerateParadigm();
                }
                foreach ($lexems as $lexem) {
                    foreach ($definitions as $definition) {
                        $ldm = LexemDefinitionMap::load($lexem->id, $definition->id);
                        if (!$ldm) {
                            $ldm = LexemDefinitionMap::create($lexem->id, $definition->id);
                            $ldm->save();
                        }
                    }
                }
            }
        }
        $seen++;
        if ($seen % 1000 == 0) {
            print "Seen: {$seen};\n";
        }
    }
    print "Seen: {$seen};\n";
}
Ejemplo n.º 2
0
$dbResult = mysql_query($query);
$fixed = 0;
$prevUnaccented = '';
while (($dbRow = mysql_fetch_assoc($dbResult)) != null) {
    $l = Lexem::createFromDbRow($dbRow);
    if ($prevUnaccented != $l->unaccented) {
        $defs = loadDefinitions($l->id);
        if (count($defs)) {
            $isPart = isParticiple($l);
            $isAdj = isAdjective($defs) || $isPart;
            $isMf = isMfNoun($defs);
            $isN = isNNoun($defs);
            $isM = isMNoun($defs) && !$isMf;
            $femForm = loadFeminineForm($l->id);
            $isF = countDefinitionsByLexicon($femForm);
            $homonyms = Lexem::loadByUnaccented($l->unaccented);
            // Very often, the participle also acts os homonym. This is normal.
            // Report other cases of N because we don't have mapping tables for N.
            if (count($homonyms) == 1 && $isN && $isPart) {
                $isN = false;
            }
            // Report cases which (1) contain a N lexem outside of the above case, OR
            // (2) Do not seem to generate all the M and F forms.
            if (!$isAdj && !$isMf && !($isM && $isF) || $isN) {
                //printLexem($homonyms, $isAdj, $isMf, $isM, $isN, $isF);
            } else {
                $fixed += replaceLexems($homonyms, $isAdj, $isMf, $isM, $isN, $isF, $femForm);
            }
        }
    }
    $prevUnaccented = $l->unaccented;
Ejemplo n.º 3
0
     assert(count($parts) >= 2);
     if ($parts[count($parts) - 1] == 'II.') {
         $d->lexicon = text_internalizeWordName($parts[0]);
         $d->status = ST_PENDING;
     } else {
         if (count($parts) == 2 && text_endsWith($parts[1], '/')) {
             // Use the first part only, because the second one is the pronunciation
             $d->lexicon = text_internalizeWordName($parts[0]);
             //print "Using [{$d->lexicon}] for " . mb_substr($d->internalRep, 0, 50) . "\n";
         } else {
             $d->status = ST_PENDING;
         }
     }
 }
 if ($d->lexicon) {
     $lexems = Lexem::loadByUnaccented($d->lexicon);
     if (!count($lexems)) {
         $lexem = Lexem::create($d->lexicon, 'T', '1', '');
         $lexem->save();
         $lexem->id = db_getLastInsertedId();
         $lexem->regenerateParadigm();
         $lexems[] = $lexem;
     }
     $d->save();
     $d->id = db_getLastInsertedId();
     foreach ($lexems as $l) {
         $ldm = LexemDefinitionMap::create($l->id, $d->id);
         $ldm->save();
     }
 } else {
     print "Skipping [{$d->internalRep}]\n";