assert_options(ASSERT_BAIL, 1); debug_off(); $dbResult = mysql_query("select * from lexems where lexem_id not in " . "(select LexemId from LexemDefinitionMap, Definition " . "where DefinitionId = Definition.Id " . "and SourceId not in (6, 8) and Status = 0)"); $seen = 0; $split = 0; while (($dbRow = mysql_fetch_assoc($dbResult)) != null) { $l = Lexem::createFromDbRow($dbRow); $defs = Definition::loadByLexemId($l->id); if (!count($defs)) { continue; } $seen++; // Remove the -ul accent where possible. if (text_endsWith($l->form, 'ul')) { $form = substr($l->form, 0, strlen($l->form) - 2); $otherLexems = Lexem::loadByForm($form); if (count($otherLexems)) { print "REMOVING -UL FROM: {$l->form}\n"; foreach ($otherLexems as $otherLexem) { foreach ($defs as $def) { LexemDefinitionMap::associate($otherLexem->id, $def->id); } } $l->delete(); $split++; continue; } } // Split the word in two, if it leads to two existing lexems, and if both // have at least three letters. if (mb_strlen($l->unaccented) >= 8 && (text_endsWith($l->form, 'lui') || text_endsWith($l->form, 'ei') || text_endsWith($l->form, 'ii') || text_endsWith($l->form, 'elor') || text_endsWith($l->form, 'ilor') || text_endsWith($l->form, 'asă') || text_endsWith($l->form, 'scă'))) {