Ejemplo n.º 1
0
assert_options(ASSERT_BAIL, 1);
debug_off();
$dbResult = mysql_query("select * from lexems where lexem_id not in " . "(select LexemId from LexemDefinitionMap, Definition " . "where DefinitionId = Definition.Id " . "and SourceId not in (6, 8) and Status = 0)");
$seen = 0;
$split = 0;
while (($dbRow = mysql_fetch_assoc($dbResult)) != null) {
    $l = Lexem::createFromDbRow($dbRow);
    $defs = Definition::loadByLexemId($l->id);
    if (!count($defs)) {
        continue;
    }
    $seen++;
    // Remove the -ul accent where possible.
    if (text_endsWith($l->form, 'ul')) {
        $form = substr($l->form, 0, strlen($l->form) - 2);
        $otherLexems = Lexem::loadByForm($form);
        if (count($otherLexems)) {
            print "REMOVING -UL FROM: {$l->form}\n";
            foreach ($otherLexems as $otherLexem) {
                foreach ($defs as $def) {
                    LexemDefinitionMap::associate($otherLexem->id, $def->id);
                }
            }
            $l->delete();
            $split++;
            continue;
        }
    }
    // Split the word in two, if it leads to two existing lexems, and if both
    // have at least three letters.
    if (mb_strlen($l->unaccented) >= 8 && (text_endsWith($l->form, 'lui') || text_endsWith($l->form, 'ei') || text_endsWith($l->form, 'ii') || text_endsWith($l->form, 'elor') || text_endsWith($l->form, 'ilor') || text_endsWith($l->form, 'asă') || text_endsWith($l->form, 'scă'))) {