function associateLongInfinitivesAndParticiples() { $lexems = Lexem::loadUnassociated(); $numMatched = 0; foreach ($lexems as $l) { $matched = false; $wordlist = WordList::loadByUnaccented($l->unaccented); foreach ($wordlist as $wl) { if ($wl->inflectionId == 50 || $wl->inflectionId == 52) { $verb = Lexem::load($wl->lexemId); print "{$l->unaccented} :: {$verb->unaccented}\n"; $matched = true; $ldms = LexemDefinitionMap::loadByLexemId($verb->id); foreach ($ldms as $ldm) { $existingLdm = LexemDefinitionMap::load($l->id, $ldm->definitionId); if (!$existingLdm) { $newLdm = LexemDefinitionMap::create($l->id, $ldm->definitionId); $newLdm->save(); } } } } if ($matched) { $numMatched++; } } print "Matched {$numMatched} of " . count($lexems) . " total lexems.\n"; }
<?php require_once '../../phplib/util.php'; $PLURAL_INFLECTIONS = array(3, 11, 19, 27, 35); $SOURCES = loadSources(); define('DRY_RUN', true); $dbResult = mysql_query('select * from lexems where (lexem_model_type = "T") or (lexem_model_type in ("MQ", "FQ", "NQ") and lexem_restriction like "%P%")' . 'order by lexem_neaccentuat'); $found = 0; while ($row = mysql_fetch_assoc($dbResult)) { $lexem = Lexem::createFromDbRow($row); $wordLists = WordList::loadByUnaccented($lexem->unaccented); $matchingWordLists = array(); foreach ($wordLists as $wl) { if (in_array($wl->inflectionId, $PLURAL_INFLECTIONS) && $wl->lexemId != $lexem->id) { $matchingWordLists[] = $wl; } } if (count($matchingWordLists)) { $sources = getSourcesForLexem($lexem); print "{$lexem->unaccented} {$lexem->modelType}{$lexem->modelNumber}{$lexem->restriction} {$sources} " . "http://dexonline.ro/admin/lexemEdit.php?lexemId={$lexem->id}\n"; foreach ($matchingWordLists as $wl) { $match = Lexem::load($wl->lexemId); $sources = getSourcesForLexem($match); print " * {$match->unaccented} {$match->modelType}{$match->modelNumber}{$match->restriction} {$sources}" . "http://dexonline.ro/admin/lexemEdit.php?lexemId={$match->id}\n"; } $found++; } } print "{$found} lexeme semnalate.\n"; /**************************************************************/ function loadSources()
function loadFeminineForm($lexemId) { $query = "select * from wordlist where wl_lexem = {$lexemId} " . "and wl_analyse = 33 and wl_variant = 0"; $dbRow = db_fetchSingleRow(mysql_query($query)); $wl = WordList::createFromDbRow($dbRow); if (!$wl) { return ''; } else { return $wl->unaccented; } }
<?php require_once "../../phplib/util.php"; ini_set('max_execution_time', '3600'); ini_set("memory_limit", "512000000"); print "Running first wordlist query...\n"; $dbResult = mysql_query("select * from wordlist"); print "Building form map...\n"; $formMap = array(); while ($dbRow = mysql_fetch_assoc($dbResult)) { $wl = WordList::createFromDbRow($dbRow); $formMap[$wl->form] = true; } // Hash table of id->description $inflIdDescrMap = buildInflectionMap(); db_init(pref_getDbHost(), pref_getDbUser(), pref_getDbPassword(), 'flexonline'); // Hash table of Radu's inflection id -> Cata's inflection id. $rcInflectionMap = matchInflections($inflIdDescrMap); print "Running second wordlist query...\n"; $query = 'select wl_form, lexem_forma, wl_analyse ' . 'from wordlist, lexems ' . 'where wl_lexem = lexem_id '; $dbResult = mysql_query($query); while ($dbRow = mysql_fetch_assoc($dbResult)) { list($form, $lexem, $inflId) = buildRWordList($dbRow); if (!array_key_exists($form, $formMap)) { print "Form: {$form} (Lexem: {$lexem} / " . $inflIdDescrMap[$rcInflectionMap[$inflId]] . ")\n"; } } /****************************************************************************/ function buildInflectionMap() { $m = array();
$models = Model::loadAll(); $tempModel = Model::loadTemporary(); foreach ($models as $model) { if ($model->modelType != 'MF' && $model->modelType != 'VT' && $model->id != $tempModel->id && $model->id == 130) { print "Testing model " . $model->getName() . " (id = " . $model->id . ")\n"; $lexems = Lexem::loadByModelId($model->id); $transfMap = array(); foreach ($lexems as $lexem) { if (array_key_exists($lexem->id, $knownBadLexems)) { print " Skipping known bad lexem " . $lexem->id . " (" . $lexem->unaccented . ")\n"; continue; } if (!$lexem->isLoc) { continue; } $wls = WordList::loadByLexemId($lexem->id); $ignore = false; for ($i = 0; $i < count($wls) && !$ignore; $i++) { $ignore = ord($wls[$i]->form) == 0 || text_contains($wls[$i]->form, "'"); } if ($ignore) { print " Ignoring lexem " . $lexem->id . " (" . $lexem->unaccented . "): paradigm contains accents or null characters\n"; } if (!$ignore) { if (!count($transfMap)) { // Create model_descriptions by comparing the first lexem to its // existing wordlists. print " Using lexem '" . $lexem->unaccented . "' as exponent\n"; foreach ($wls as $wl) { $transforms = text_extractTransforms($lexem->unaccented, $wl->form); $transfMap[$wl->inflectionId] = $transforms;
static function get_sharing_info_of_list($user_id, $word_list_id) { return WordList::get_sharing_info_of_list($user_id, $word_list_id); }
//print "Examining {$def->internalRep}\n"; for ($i = 0; $i < $len; $i++) { $c = text_getCharAt($rep, $i); if (!text_isUnicodeLetter($prevC) && $c == '-' && $i <= MAX_LEN) { $j = $i + 1; while (text_isUnicodeLetter(text_getCharAt($rep, $j))) { $j++; } $chunk = mb_substr($rep, $i, $j - $i); if ($chunk != '-') { $suffix = mb_substr($chunk, 1); //print "{$def->id} [{$def->lexicon}] $i [$chunk]\n"; if ($lexem) { $matchingForm = null; foreach ($inflections as $inflId) { $wls = WordList::loadByLexemIdInflectionId($lexem->id, $inflId); foreach ($wls as $wl) { if (matchesWithAccent($wl->form, $suffix)) { $matchingForm = $wl->form; //print "Matching [{$wl->form}] to [$chunk]\n"; } } } if ($matchingForm) { $matchingFormImpl = str_replace($GLOBALS['text_explicitAccent'], $GLOBALS['text_accented'], $matchingForm); // Convert to uppercase when the suffix itself is uppercase if ($suffix == text_unicodeToUpper($suffix)) { $matchingFormImpl = text_unicodeToUpper($matchingFormImpl); } $newRep .= $matchingFormImpl; } else {