Esempio n. 1
0
<?php

require_once '../../phplib/util.php';
assert_options(ASSERT_BAIL, 1);
debug_off();
$dbResult = mysql_query("select * from lexems where lexem_extra != ''");
$seen = 0;
$removed = 0;
while (($dbRow = mysql_fetch_assoc($dbResult)) != null) {
    $l = Lexem::createFromDbRow($dbRow);
    $seen++;
    $extra = $l->extra;
    if (text_startsWith($extra, '[') && text_endsWith($extra, ']')) {
        $extra = mb_substr($extra, 1, mb_strlen($extra) - 2);
    }
    if (text_startsWith($extra, '(') && text_endsWith($extra, ')')) {
        $extra = mb_substr($extra, 1, mb_strlen($extra) - 2);
    }
    // Sometimes the extra is just the model number
    $found = $extra == $l->modelType . $l->modelNumber . $l->restriction;
    // Sometimes the extra refers to a homonym's model
    if (!$found) {
        $homonyms = $l->loadHomonyms();
        foreach ($homonyms as $h) {
            $found |= $extra == $h->modelType . $h->modelNumber . $h->restriction;
        }
    }
    if (!$found) {
        $defs = Definition::loadByLexemId($l->id);
        foreach ($defs as $def) {
            $found |= text_contains($def->internalRep, $extra);
Esempio n. 2
0
     $otherLexems = Lexem::loadByForm($form);
     if (count($otherLexems)) {
         print "REMOVING -UL FROM: {$l->form}\n";
         foreach ($otherLexems as $otherLexem) {
             foreach ($defs as $def) {
                 LexemDefinitionMap::associate($otherLexem->id, $def->id);
             }
         }
         $l->delete();
         $split++;
         continue;
     }
 }
 // Split the word in two, if it leads to two existing lexems, and if both
 // have at least three letters.
 if (mb_strlen($l->unaccented) >= 8 && (text_endsWith($l->form, 'lui') || text_endsWith($l->form, 'ei') || text_endsWith($l->form, 'ii') || text_endsWith($l->form, 'elor') || text_endsWith($l->form, 'ilor') || text_endsWith($l->form, 'asă') || text_endsWith($l->form, 'scă'))) {
     $len = mb_strlen($l->unaccented);
     for ($splitPoint = 3; $splitPoint <= $len - 3; $splitPoint++) {
         $word1 = mb_substr($l->unaccented, 0, $splitPoint);
         $word2 = mb_substr($l->unaccented, $splitPoint);
         $l1 = getNouns($word1);
         $l2 = getNouns($word2);
         if (count($l1) && count($l2)) {
             print "[{$l->form}] [{$word1}] [{$word2}]\n";
             $all = array_merge($l1, $l2);
             foreach ($defs as $def) {
                 foreach ($all as $newLexem) {
                     LexemDefinitionMap::associate($newLexem->id, $def->id);
                 }
                 // Also fix the definition if it is missing a hyphen.
                 $firstAt = strpos($def->internalRep, '@');
Esempio n. 3
0
require_once '../phplib/ads/adsModule.php';
require_once '../phplib/ads/diverta/divertaAdsModule.php';
$opts = getopt('s:');
if (count($opts) != 1) {
    print "Usage: fixDivertaBooks -s <start-id>\n";
    exit;
}
// Resolve some ambiguities automatically. List the form that is alphabetically first and specify what to return
$PREFERRED_FORMS = array('a' => 'a', 'al' => 'al', 'carte' => 'carte', 'cartea' => 'cartea', 'clasa' => 'clasa', 'fara' => 'fără', 'i' => 'i', 'ii' => 'ii', 'in' => 'în', 'la' => 'la', 'mai' => 'mai', 'mare' => 'mare', 'marea' => 'marea', 'povesti' => 'povești', 'print' => 'prinț', 'printul' => 'prințul', 's' => 's', 'sa' => 'să', 'si' => 'și', 'teste' => 'teste', 'ti' => 'ți', 'timp' => 'timp', 'top' => 'top');
$books = db_find(new DivertaBook(), "id >= {$opts['s']} order by id");
foreach ($books as $book) {
    print "Loaded: {$book->id} [{$book->title}]    [{$book->url}]\n";
    $origTitle = $book->title;
    // Preliminary stuff
    $book->title = trim($book->title);
    if (text_endsWith($book->title, ', ***')) {
        $book->title = substr($book->title, 0, -5);
    }
    switch ($book->sku) {
        case 'YDA00965':
            $book->title = 'Dicționar vizual spaniol-român';
            break;
        case 'YHG00310':
            $book->title = '77 de rețete celebre și poveștile lor';
            break;
        case 'YHU02030':
            $book->title = 'Zen aici și acum';
            break;
        case 'YCV00945':
            $book->title = 'Bărbatul manipulator';
            break;
Esempio n. 4
0
<?php

require_once '../phplib/util.php';
$INFL_LONG_INF = 50;
$INFL_PART = 52;
$LEXEM_EDIT_URL = 'http://dexonline.ro/admin/lexemEdit.php?lexemId=';
$dbResult = db_execute("select * from Lexem where modelType in ('V', 'VT') and isLoc order by formNoAccent");
while (!$dbResult->EOF) {
    $verb = new Lexem();
    $verb->set($dbResult->fields);
    $dbResult->MoveNext();
    $ifs = db_find(new InflectedForm(), "lexemId = {$verb->id} and inflectionId = {$INFL_LONG_INF}");
    assert(count($ifs) <= 1);
    if (count($ifs) == 1) {
        $longInfForm = $ifs[0]->formNoAccent;
        $longInfModelNumber = text_endsWith($longInfForm, 'are') ? '113' : '107';
        $lexems = db_find(new Lexem(), "formNoAccent = '{$longInfForm}' and modelType = 'F' and modelNumber = '{$longInfModelNumber}'");
        if (count($lexems) != 1) {
            print "I {$longInfForm} are " . count($lexems) . " lexeme corespunzătoare\n";
        }
        foreach ($lexems as $longInf) {
            if (!$longInf->isLoc) {
                print "I {$longInf->formNoAccent} nu este în LOC {$LEXEM_EDIT_URL}{$longInf->id}\n";
                $longInf->isLoc = 1;
                $longInf->save();
            }
        }
    }
    if ($verb->modelType == 'VT') {
        $ifs = db_find(new InflectedForm(), "lexemId = {$verb->id} and inflectionId = {$INFL_PART}");
        $pm = ParticipleModel::loadByVerbModel($verb->modelNumber);
Esempio n. 5
0
function simplifyText($s)
{
    $s = preg_replace("/[@\$^0-9()%.]/", "", text_unicodeToLower(trim($s)));
    if (text_endsWith($s, '-')) {
        $s = substr($s, 0, strlen($s) - 1);
    }
    return $s;
}
Esempio n. 6
0
<?php

define('NOTE', ' ?[(]?Notă: Definiția este preluată din Dicționar enciclopedic vol\\. .*, Editura Enciclopedică, ....[)]\\.?$');
$query = "select * from Definition where InternalRep rlike '" . NOTE . "'";
$dbResult = mysql_query($query);
$count = 0;
while ($dbRow = mysql_fetch_assoc($dbResult)) {
    $def = Definition::createFromDbRow($dbRow);
    $pos = strpos($def->internalRep, 'Notă:');
    $rep = substr($def->internalRep, 0, $pos);
    if (text_endsWith($rep, '(')) {
        $rep = substr($rep, 0, -1);
    }
    $rep = trim($rep);
    $def->internalRep = $rep;
    $definition->htmlRep = text_htmlize($def->internalRep);
    $def->sourceId = 25;
    $def->save();
    $count++;
}
print "{$count} definitions modified.\n";
Esempio n. 7
0
/**
 * 
 */
function matchesWithAccent($form, $suffix)
{
    $suffix = text_unicodeToLower($suffix);
    $suffixExpl = str_replace($GLOBALS['text_accented'], $GLOBALS['text_explicitAccent'], $suffix);
    $formHasAccent = strstr($form, "'") !== false;
    $suffixHasAccent = strstr($suffixExpl, "'") !== false;
    if ($formHasAccent && $suffixHasAccent) {
        $formImpl = str_replace($GLOBALS['text_explicitAccent'], $GLOBALS['text_accented'], $form);
        return text_endsWith($formImpl, $suffix);
    } else {
        if ($formHasAccent && !$suffixHasAccent) {
            $formNoAccent = str_replace("'", "", $form);
            return text_endsWith($formNoAccent, $suffix);
        } else {
            if (!$formHasAccent && $suffixHasAccent) {
                $suffixNoAccent = str_replace("'", "", $suffixExpl);
                return text_endsWith($form, $suffixNoAccent);
            } else {
                // No accents
                return text_endsWith($form, $suffix);
            }
        }
    }
}
Esempio n. 8
0
/**
 * Returns a list containing the next token and the new stream position.
 * If we reach the end of the file, the token is empty.
 * Tokens are opening tags (without the attributes), closing tags or text.
 **/
function getNextToken($pos)
{
    global $data;
    global $dataLen;
    while ($pos < $dataLen && ctype_space($data[$pos])) {
        $pos++;
    }
    if ($pos >= $dataLen) {
        return array('', $dataLen);
    }
    $result = '';
    // If we hit a '<' sign, parse tag.
    if ($data[$pos] == '<') {
        do {
            $result .= $data[$pos];
            $done = text_startsWith($result, '<!--') ? text_endsWith($result, '-->') : $data[$pos] == '>';
            $pos++;
        } while (!$done);
        // Strip the attributes
        $tagEnd = 1;
        while (!ctype_space($result[$tagEnd]) && $result[$tagEnd] != '>') {
            $tagEnd++;
        }
        $result = substr($result, 0, $tagEnd) . '>';
        return array($result, $pos);
    }
    // Parse text to the next '<' sign or EOF.
    while ($pos < $dataLen && $data[$pos] != '<') {
        $result .= $data[$pos];
        $pos++;
    }
    return array(trim($result), $pos);
}
Esempio n. 9
0
 $d->lexicon = text_extractLexicon($d);
 $d->status = ST_ACTIVE;
 if (array_key_exists($d->lexicon, $existing)) {
     $skipped++;
 } else {
     // Handle some special cases where the lexicon needs to be adjusted.
     if (preg_match("/^@[^@,]+ /", $d->internalRep)) {
         $pos = strpos($d->internalRep, '@', 1);
         $text = substr($d->internalRep, 1, $pos - 1);
         $parts = split(' ', $text);
         assert(count($parts) >= 2);
         if ($parts[count($parts) - 1] == 'II.') {
             $d->lexicon = text_internalizeWordName($parts[0]);
             $d->status = ST_PENDING;
         } else {
             if (count($parts) == 2 && text_endsWith($parts[1], '/')) {
                 // Use the first part only, because the second one is the pronunciation
                 $d->lexicon = text_internalizeWordName($parts[0]);
                 //print "Using [{$d->lexicon}] for " . mb_substr($d->internalRep, 0, 50) . "\n";
             } else {
                 $d->status = ST_PENDING;
             }
         }
     }
     if ($d->lexicon) {
         $lexems = Lexem::loadByUnaccented($d->lexicon);
         if (!count($lexems)) {
             $lexem = Lexem::create($d->lexicon, 'T', '1', '');
             $lexem->save();
             $lexem->id = db_getLastInsertedId();
             $lexem->regenerateParadigm();