Exemple #1
0
function associateLongInfinitivesAndParticiples()
{
    $lexems = Lexem::loadUnassociated();
    $numMatched = 0;
    foreach ($lexems as $l) {
        $matched = false;
        $wordlist = WordList::loadByUnaccented($l->unaccented);
        foreach ($wordlist as $wl) {
            if ($wl->inflectionId == 50 || $wl->inflectionId == 52) {
                $verb = Lexem::load($wl->lexemId);
                print "{$l->unaccented} :: {$verb->unaccented}\n";
                $matched = true;
                $ldms = LexemDefinitionMap::loadByLexemId($verb->id);
                foreach ($ldms as $ldm) {
                    $existingLdm = LexemDefinitionMap::load($l->id, $ldm->definitionId);
                    if (!$existingLdm) {
                        $newLdm = LexemDefinitionMap::create($l->id, $ldm->definitionId);
                        $newLdm->save();
                    }
                }
            }
        }
        if ($matched) {
            $numMatched++;
        }
    }
    print "Matched {$numMatched} of " . count($lexems) . " total lexems.\n";
}
<?php

require_once '../../phplib/util.php';
$PLURAL_INFLECTIONS = array(3, 11, 19, 27, 35);
$SOURCES = loadSources();
define('DRY_RUN', true);
$dbResult = mysql_query('select * from lexems where (lexem_model_type = "T") or (lexem_model_type in ("MQ", "FQ", "NQ") and lexem_restriction like "%P%")' . 'order by lexem_neaccentuat');
$found = 0;
while ($row = mysql_fetch_assoc($dbResult)) {
    $lexem = Lexem::createFromDbRow($row);
    $wordLists = WordList::loadByUnaccented($lexem->unaccented);
    $matchingWordLists = array();
    foreach ($wordLists as $wl) {
        if (in_array($wl->inflectionId, $PLURAL_INFLECTIONS) && $wl->lexemId != $lexem->id) {
            $matchingWordLists[] = $wl;
        }
    }
    if (count($matchingWordLists)) {
        $sources = getSourcesForLexem($lexem);
        print "{$lexem->unaccented} {$lexem->modelType}{$lexem->modelNumber}{$lexem->restriction} {$sources} " . "http://dexonline.ro/admin/lexemEdit.php?lexemId={$lexem->id}\n";
        foreach ($matchingWordLists as $wl) {
            $match = Lexem::load($wl->lexemId);
            $sources = getSourcesForLexem($match);
            print "    * {$match->unaccented} {$match->modelType}{$match->modelNumber}{$match->restriction} {$sources}" . "http://dexonline.ro/admin/lexemEdit.php?lexemId={$match->id}\n";
        }
        $found++;
    }
}
print "{$found} lexeme semnalate.\n";
/**************************************************************/
function loadSources()
Exemple #3
0
function loadFeminineForm($lexemId)
{
    $query = "select * from wordlist where wl_lexem = {$lexemId} " . "and wl_analyse = 33 and wl_variant = 0";
    $dbRow = db_fetchSingleRow(mysql_query($query));
    $wl = WordList::createFromDbRow($dbRow);
    if (!$wl) {
        return '';
    } else {
        return $wl->unaccented;
    }
}
<?php

require_once "../../phplib/util.php";
ini_set('max_execution_time', '3600');
ini_set("memory_limit", "512000000");
print "Running first wordlist query...\n";
$dbResult = mysql_query("select * from wordlist");
print "Building form map...\n";
$formMap = array();
while ($dbRow = mysql_fetch_assoc($dbResult)) {
    $wl = WordList::createFromDbRow($dbRow);
    $formMap[$wl->form] = true;
}
// Hash table of id->description
$inflIdDescrMap = buildInflectionMap();
db_init(pref_getDbHost(), pref_getDbUser(), pref_getDbPassword(), 'flexonline');
// Hash table of Radu's inflection id -> Cata's inflection id.
$rcInflectionMap = matchInflections($inflIdDescrMap);
print "Running second wordlist query...\n";
$query = 'select wl_form, lexem_forma, wl_analyse ' . 'from wordlist, lexems ' . 'where wl_lexem = lexem_id ';
$dbResult = mysql_query($query);
while ($dbRow = mysql_fetch_assoc($dbResult)) {
    list($form, $lexem, $inflId) = buildRWordList($dbRow);
    if (!array_key_exists($form, $formMap)) {
        print "Form: {$form} (Lexem: {$lexem} / " . $inflIdDescrMap[$rcInflectionMap[$inflId]] . ")\n";
    }
}
/****************************************************************************/
function buildInflectionMap()
{
    $m = array();
$models = Model::loadAll();
$tempModel = Model::loadTemporary();
foreach ($models as $model) {
    if ($model->modelType != 'MF' && $model->modelType != 'VT' && $model->id != $tempModel->id && $model->id == 130) {
        print "Testing model " . $model->getName() . " (id = " . $model->id . ")\n";
        $lexems = Lexem::loadByModelId($model->id);
        $transfMap = array();
        foreach ($lexems as $lexem) {
            if (array_key_exists($lexem->id, $knownBadLexems)) {
                print "  Skipping known bad lexem " . $lexem->id . " (" . $lexem->unaccented . ")\n";
                continue;
            }
            if (!$lexem->isLoc) {
                continue;
            }
            $wls = WordList::loadByLexemId($lexem->id);
            $ignore = false;
            for ($i = 0; $i < count($wls) && !$ignore; $i++) {
                $ignore = ord($wls[$i]->form) == 0 || text_contains($wls[$i]->form, "'");
            }
            if ($ignore) {
                print "  Ignoring lexem " . $lexem->id . " (" . $lexem->unaccented . "): paradigm contains accents or null characters\n";
            }
            if (!$ignore) {
                if (!count($transfMap)) {
                    // Create model_descriptions by comparing the first lexem to its
                    // existing wordlists.
                    print "  Using lexem '" . $lexem->unaccented . "' as exponent\n";
                    foreach ($wls as $wl) {
                        $transforms = text_extractTransforms($lexem->unaccented, $wl->form);
                        $transfMap[$wl->inflectionId] = $transforms;
 static function get_sharing_info_of_list($user_id, $word_list_id)
 {
     return WordList::get_sharing_info_of_list($user_id, $word_list_id);
 }
Exemple #7
0
 //print "Examining {$def->internalRep}\n";
 for ($i = 0; $i < $len; $i++) {
     $c = text_getCharAt($rep, $i);
     if (!text_isUnicodeLetter($prevC) && $c == '-' && $i <= MAX_LEN) {
         $j = $i + 1;
         while (text_isUnicodeLetter(text_getCharAt($rep, $j))) {
             $j++;
         }
         $chunk = mb_substr($rep, $i, $j - $i);
         if ($chunk != '-') {
             $suffix = mb_substr($chunk, 1);
             //print "{$def->id} [{$def->lexicon}] $i [$chunk]\n";
             if ($lexem) {
                 $matchingForm = null;
                 foreach ($inflections as $inflId) {
                     $wls = WordList::loadByLexemIdInflectionId($lexem->id, $inflId);
                     foreach ($wls as $wl) {
                         if (matchesWithAccent($wl->form, $suffix)) {
                             $matchingForm = $wl->form;
                             //print "Matching [{$wl->form}] to [$chunk]\n";
                         }
                     }
                 }
                 if ($matchingForm) {
                     $matchingFormImpl = str_replace($GLOBALS['text_explicitAccent'], $GLOBALS['text_accented'], $matchingForm);
                     // Convert to uppercase when the suffix itself is uppercase
                     if ($suffix == text_unicodeToUpper($suffix)) {
                         $matchingFormImpl = text_unicodeToUpper($matchingFormImpl);
                     }
                     $newRep .= $matchingFormImpl;
                 } else {