Пример #1
0
 public static function dist($s1, $s2)
 {
     $s1 = mb_strtolower(StringUtil::unicodeToLatin($s1));
     $s2 = mb_strtolower(StringUtil::unicodeToLatin($s2));
     $len1 = mb_strlen($s1);
     $len2 = mb_strlen($s2);
     // Split the strings into characters to minimize the number calls to getCharAt().
     $chars1 = array();
     for ($i = 0; $i < $len1; $i++) {
         $chars1[] = StringUtil::getCharAt($s1, $i);
     }
     $chars2 = array();
     for ($j = 0; $j < $len2; $j++) {
         $chars2[] = StringUtil::getCharAt($s2, $j);
     }
     // Initialize the first row and column of the matrix
     $a = array();
     for ($i = 0; $i <= $len1; $i++) {
         $a[$i][0] = $i * self::$DIST_OTHER;
     }
     for ($j = 0; $j <= $len2; $j++) {
         $a[0][$j] = $j * self::$COST_DEL;
     }
     // Compute the rest of the matrix with the custom Levenshtein algorithm
     for ($i = 0; $i < $len1; $i++) {
         for ($j = 0; $j < $len2; $j++) {
             $mati = $i + 1;
             $matj = $j + 1;
             // Delete
             $a[$mati][$matj] = $a[$mati][$matj - 1] + self::$COST_DEL;
             // Insert
             $costInsert = $i == 0 ? self::$INFTY : max(self::$COST_INS, self::letterDistance($chars1[$i], $chars1[$i - 1]));
             // At least COST_INS
             $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 1][$matj] + $costInsert);
             // Modify (This includes the case where $s1[i] == $s2[j] because dist(x, x) returns 0)
             $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 1][$matj - 1] + self::letterDistance($chars1[$i], $chars2[$j]));
             // Transpose
             if ($i && $j && $chars1[$i] == $chars2[$j - 1] && $chars1[$i - 1] == $chars2[$j]) {
                 $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 2][$matj - 2] + self::$COST_TRANSPOSE);
             }
         }
     }
     return $a[$len1][$len2];
 }
Пример #2
0
 function leftAndRightPadding($offset)
 {
     crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line ' . __LINE__);
     $before = '';
     $middle = StringUtil::getCharAt($this->text, $offset);
     $after = '';
     $infOffset = $offset - 1;
     $supOffset = $offset + 1;
     $infPadding = false;
     $supPadding = false;
     for ($i = 0; $i < self::$paddingNumber; $i++) {
         if ($infOffset < 0) {
             //$before = self::$paddingChar . $before;
             $before = $before . self::$paddingChar;
         } else {
             if (!$infPadding) {
                 $infCh = StringUtil::getCharAt($this->text, $infOffset);
                 $infPadding = self::isSeparator($infCh);
             }
             if ($infPadding) {
                 //$before = self::$paddingChar . $before;
                 $before = $before . self::$paddingChar;
             } else {
                 //$before = $infCh . $before;
                 $before = $before . $infCh;
                 $infOffset--;
             }
         }
         if ($supOffset > $this->textEndOffset) {
             $after = $after . self::$paddingChar;
         } else {
             if (!$supPadding) {
                 $supCh = StringUtil::getCharAt($this->text, $supOffset);
                 $supPadding = self::isSeparator($supCh);
             }
             if ($supPadding) {
                 $after = $after . self::$paddingChar;
             } else {
                 $after = $after . $supCh;
                 $supOffset++;
             }
         }
     }
     crawlerLog("IN TEXT " . $before . '|' . $middle . '|' . $after);
     $tableObj = Diacritics::entryExists($before, $middle, $after);
     if ($tableObj != null) {
         crawlerLog("Entry Exists");
         $ch = $this->getAllCharForms($tableObj, $middle);
         $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset);
         $this->resultText .= $textSubstr;
         $this->hiddenText .= $textSubstr;
         $this->resultText .= $ch;
         if (mb_strlen($ch) == 1) {
             $this->hiddenText .= $ch;
         } else {
             $this->hiddenText .= "@@" . ($this->selectCount - 1) . "@@";
         }
     } else {
         $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1);
         $this->resultText .= $textSubstr;
         $this->hiddenText .= $textSubstr;
     }
     $this->lastOffset = $this->currOffset;
 }
    // Collect the user choices
    $choices = array();
    foreach ($_REQUEST as $name => $value) {
        if (StringUtil::startsWith($name, 'radio_')) {
            $choices[substr($name, 6)] = $value;
        }
    }
    // Collect the positions of ambiguous abbreviations
    $matches = array();
    AdminStringUtil::markAbbreviations($def->internalRep, $def->sourceId, $matches);
    usort($matches, 'positionCmp');
    $s = $def->internalRep;
    foreach ($matches as $i => $m) {
        if ($choices[count($choices) - 1 - $i] == 'abbrev') {
            $orig = substr($s, $m['position'], $m['length']);
            $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? AdminStringUtil::capitalize($m['abbrev']) : $m['abbrev'];
            $s = substr_replace($s, "#{$replacement}#", $m['position'], $m['length']);
        }
    }
    $def->internalRep = $s;
    $def->htmlRep = AdminStringUtil::htmlize($def->internalRep, $def->sourceId);
    $def->abbrevReview = ABBREV_REVIEW_COMPLETE;
    $def->save();
}
$MARKER = 'DEADBEEF';
// any string that won't occur naturally in a definition
$def = null;
$ids = db_getArray(sprintf('select id from Definition where status != %d and abbrevReview = %d', ST_DELETED, ABBREV_AMBIGUOUS));
if (count($ids)) {
    $defId = $ids[array_rand($ids, 1)];
    $def = Definition::get_by_id($defId);
Пример #4
0
                        $lm->regenerateParadigm();
                    }
                }
            }
        }
    }
    util_redirect("placeAccents.php");
}
$chars = array();
$searchResults = array();
$lexems = Model::factory('Lexem')->raw_query("select * from Lexem where form not rlike '\\'' and not noAccent order by rand() limit 10")->find_many();
foreach ($lexems as $l) {
    $charArray = array();
    $form = mb_strtoupper($l->form);
    $len = mb_strlen($form);
    for ($i = 0; $i < $len; $i++) {
        $c = StringUtil::getCharAt($form, $i);
        $charArray[] = ctype_space($c) ? '&nbsp;' : $c;
    }
    $chars[$l->id] = $charArray;
    $definitions = Definition::loadByLexemId($l->id);
    $searchResults[$l->id] = SearchResult::mapDefinitionArray($definitions);
}
RecentLink::createOrUpdate('Plasare accente');
SmartyWrap::assign('sectionTitle', 'Plasare accente');
SmartyWrap::assign('lexems', $lexems);
SmartyWrap::assign('chars', $chars);
SmartyWrap::assign('searchResults', $searchResults);
SmartyWrap::assign("allStatuses", util_getAllStatuses());
SmartyWrap::assign('recentLinks', RecentLink::loadForUser());
SmartyWrap::displayAdminPage('admin/placeAccents.ihtml');
Пример #5
0
 function leftAndRightPadding($offset)
 {
     Applog::log("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line ' . __LINE__, 4);
     $before = '';
     $middle = StringUtil::getCharAt($this->file, $offset);
     $after = '';
     $infOffset = $offset - 1;
     $supOffset = $offset + 1;
     $infPadding = false;
     $supPadding = false;
     for ($i = 0; $i < self::$paddingNumber; $i++) {
         if ($infOffset < 0) {
             //$before = self::$paddingChar . $before;
             $before = $before . self::$paddingChar;
         } else {
             if (!$infPadding) {
                 $infCh = StringUtil::getCharAt($this->file, $infOffset);
                 $infPadding = self::isSeparator($infCh);
             }
             if ($infPadding) {
                 //$before = self::$paddingChar . $before;
                 $before = $before . self::$paddingChar;
             } else {
                 //$before = $infCh . $before;
                 $before = $before . $infCh;
                 $infOffset--;
             }
         }
         if ($supOffset > $this->fileEndOffset) {
             $after = $after . self::$paddingChar;
         } else {
             if (!$supPadding) {
                 $supCh = StringUtil::getCharAt($this->file, $supOffset);
                 $supPadding = self::isSeparator($supCh);
             }
             if ($supPadding) {
                 $after = $after . self::$paddingChar;
             } else {
                 $after = $after . $supCh;
                 $supOffset++;
             }
         }
     }
     Diacritics::save2Db($before, $middle, $after);
 }
Пример #6
0
 static function placeAccent($s, $pos, $vowel)
 {
     $i = mb_strlen($s);
     while ($i && $pos) {
         $i--;
         $c = StringUtil::getCharAt($s, $i);
         if (self::isVowel($c)) {
             $pos--;
         }
     }
     if (!$pos) {
         // Sometimes we have to move the accent forward or backward to account
         // for diphthongs
         if ($vowel && StringUtil::getCharAt($s, $i) != $vowel) {
             if ($i > 0 && StringUtil::getCharAt($s, $i - 1) == $vowel) {
                 $i--;
             } else {
                 if ($i < mb_strlen($s) - 1 && StringUtil::getCharAt($s, $i + 1) == $vowel) {
                     $i++;
                 } else {
                     //print "Nu pot găsi vocala $vowel la poziția $pos în șirul $s\n";
                 }
             }
         }
         $s = self::insert($s, "'", $i);
     }
     return $s;
 }
Пример #7
0
function validate($lexem, $original, $variantIds, $meanings)
{
    if (!$lexem->form) {
        FlashMessage::add('Forma nu poate fi vidă.');
    }
    $numAccents = mb_substr_count($lexem->form, "'");
    // Note: we allow multiple accents for lexems like hárcea-párcea
    if ($numAccents && $lexem->noAccent) {
        FlashMessage::add('Ați indicat că lexemul nu necesită accent, dar forma conține un accent.');
    } else {
        if (!$numAccents && !$lexem->noAccent) {
            FlashMessage::add('Adăugați un accent sau debifați câmpul "Necesită accent".');
        }
    }
    foreach ($lexem->getLexemModels() as $lm) {
        $hasS = false;
        $hasP = false;
        for ($i = 0; $i < mb_strlen($lm->restriction); $i++) {
            $c = StringUtil::getCharAt($lm->restriction, $i);
            if ($c == 'T' || $c == 'U' || $c == 'I') {
                if ($lm->modelType != 'V' && $lm->modelType != 'VT') {
                    FlashMessage::add("Restricția <b>{$c}</b> se aplică numai verbelor");
                }
            } else {
                if ($c == 'S') {
                    if ($lm->modelType == 'I' || $lm->modelType == 'T') {
                        FlashMessage::add("Restricția <b>S</b> nu se aplică modelului {$lm->modelType}");
                    }
                    $hasS = true;
                } else {
                    if ($c == 'P') {
                        if ($lm->modelType == 'I' || $lm->modelType == 'T') {
                            FlashMessage::add("Restricția <b>P</b> nu se aplică modelului {$lm->modelType}");
                        }
                        $hasP = true;
                    } else {
                        FlashMessage::add("Restricția <b>{$c}</b> este incorectă.");
                    }
                }
            }
        }
        if ($hasS && $hasP) {
            FlashMessage::add("Restricțiile <b>S</b> și <b>P</b> nu pot coexista.");
        }
        $ifs = $lm->generateInflectedForms();
        if (!is_array($ifs)) {
            $infl = Inflection::get_by_id($ifs);
            FlashMessage::add(sprintf("Nu pot genera flexiunea '%s' conform modelului %s%s", htmlentities($infl->description), $lm->modelType, $lm->modelNumber));
        }
    }
    $variantOf = Lexem::get_by_id($lexem->variantOfId);
    if ($variantOf && !goodForVariantJson($meanings)) {
        FlashMessage::add("Acest lexem este o variantă a lui {$variantOf} și nu poate avea el însuși sensuri. " . "Este permis doar un sens, fără conținut, pentru indicarea surselor și a registrelor de folosire.");
    }
    if ($variantOf && !empty($variantIds)) {
        FlashMessage::add("Acest lexem este o variantă a lui {$variantOf} și nu poate avea el însuși variante.");
    }
    if ($variantOf && $variantOf->id == $lexem->id) {
        FlashMessage::add("Lexemul nu poate fi variantă a lui însuși.");
    }
    foreach ($variantIds as $variantId) {
        $variant = Lexem::get_by_id($variantId);
        if ($variant->id == $lexem->id) {
            FlashMessage::add('Lexemul nu poate fi variantă a lui însuși.');
        }
        if ($variant->variantOfId && $variant->variantOfId != $lexem->id) {
            $other = Lexem::get_by_id($variant->variantOfId);
            FlashMessage::add("\"{$variant}\" este deja marcat ca variantă a lui \"{$other}\".");
        }
        $variantVariantCount = Model::factory('Lexem')->where('variantOfId', $variant->id)->count();
        if ($variantVariantCount) {
            FlashMessage::add("\"{$variant}\" are deja propriile lui variante.");
        }
        $variantMeanings = Model::factory('Meaning')->where('lexemId', $variant->id)->find_many();
        if (!goodForVariant($variantMeanings)) {
            FlashMessage::add("\"{$variant}\" are deja propriile lui sensuri.");
        }
    }
    if ($lexem->structStatus == Lexem::STRUCT_STATUS_DONE && $original->structStatus != Lexem::STRUCT_STATUS_DONE && !util_isModerator(PRIV_EDIT)) {
        FlashMessage::add("Doar moderatorii pot marca structurarea drept terminată. Vă rugăm să folosiți valoarea „așteaptă moderarea”.");
    }
    return FlashMessage::getMessage() == null;
}
Пример #8
0
function validateRestriction($modelType, $restriction)
{
    $hasS = false;
    $hasP = false;
    for ($i = 0; $i < mb_strlen($restriction); $i++) {
        $char = StringUtil::getCharAt($restriction, $i);
        if ($char == 'T' || $char == 'U' || $char == 'I') {
            if ($modelType != 'V' && $modelType != 'VT') {
                return "Restricția <b>{$char}</b> se aplică numai verbelor";
            }
        } else {
            if ($char == 'S') {
                if ($modelType == 'I' || $modelType == 'T') {
                    return "Restricția S nu se aplică modelului {$modelType}";
                }
                $hasS = true;
            } else {
                if ($char == 'P') {
                    if ($modelType == 'I' || $modelType == 'T') {
                        return "Restricția P nu se aplică modelului {$modelType}";
                    }
                    $hasP = true;
                } else {
                    return "Restricția <b>{$char}</b> este incorectă.";
                }
            }
        }
    }
    if ($hasS && $hasP) {
        return "Restricțiile <b>S</b> și <b>P</b> nu pot coexista.";
    }
    return null;
}
Пример #9
0
function parseModel($s)
{
    $len = mb_strlen($s);
    $i = 0;
    while ($i < $len && ctype_upper(StringUtil::getCharAt($s, $i))) {
        $i++;
    }
    return [mb_substr($s, 0, $i), mb_substr($s, $i)];
}
 static function markAbbreviations($s, $sourceId, &$ambiguousMatches = null)
 {
     $abbrevs = self::loadAbbreviations();
     $hashMap = self::constructHashMap($s);
     if (!array_key_exists($sourceId, $abbrevs)) {
         return $s;
     }
     // Do not report two ambiguities at the same position, for example M. and m.
     $positionsUsed = array();
     foreach ($abbrevs[$sourceId] as $from => $tuple) {
         $matches = array();
         // Perform a case-sensitive match if the pattern contains any uppercase, case-insensitive otherwise
         $modifier = $tuple['hasCaps'] ? "" : "i";
         preg_match_all("/{$tuple['regexp']}/u{$modifier}", $s, $matches, PREG_OFFSET_CAPTURE);
         // We always add the /u modifier for Unicode
         if (count($matches[1])) {
             foreach (array_reverse($matches[1]) as $match) {
                 $orig = $match[0];
                 $position = $match[1];
                 if (!$hashMap[$position]) {
                     // Don't replace anything if we are already between hash signs
                     if ($tuple['ambiguous']) {
                         if ($ambiguousMatches !== null && !array_key_exists($position, $positionsUsed)) {
                             $ambiguousMatches[] = array('abbrev' => $from, 'position' => $position, 'length' => strlen($orig));
                             $positionsUsed[$position] = true;
                         }
                     } else {
                         $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? self::capitalize($from) : $from;
                         $s = substr_replace($s, "#{$replacement}#", $position, strlen($orig));
                         array_splice($hashMap, $position, strlen($orig), array_fill(0, 2 + strlen($replacement), true));
                     }
                 }
             }
         }
     }
     return $s;
 }