public static function dist($s1, $s2) { $s1 = mb_strtolower(StringUtil::unicodeToLatin($s1)); $s2 = mb_strtolower(StringUtil::unicodeToLatin($s2)); $len1 = mb_strlen($s1); $len2 = mb_strlen($s2); // Split the strings into characters to minimize the number calls to getCharAt(). $chars1 = array(); for ($i = 0; $i < $len1; $i++) { $chars1[] = StringUtil::getCharAt($s1, $i); } $chars2 = array(); for ($j = 0; $j < $len2; $j++) { $chars2[] = StringUtil::getCharAt($s2, $j); } // Initialize the first row and column of the matrix $a = array(); for ($i = 0; $i <= $len1; $i++) { $a[$i][0] = $i * self::$DIST_OTHER; } for ($j = 0; $j <= $len2; $j++) { $a[0][$j] = $j * self::$COST_DEL; } // Compute the rest of the matrix with the custom Levenshtein algorithm for ($i = 0; $i < $len1; $i++) { for ($j = 0; $j < $len2; $j++) { $mati = $i + 1; $matj = $j + 1; // Delete $a[$mati][$matj] = $a[$mati][$matj - 1] + self::$COST_DEL; // Insert $costInsert = $i == 0 ? self::$INFTY : max(self::$COST_INS, self::letterDistance($chars1[$i], $chars1[$i - 1])); // At least COST_INS $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 1][$matj] + $costInsert); // Modify (This includes the case where $s1[i] == $s2[j] because dist(x, x) returns 0) $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 1][$matj - 1] + self::letterDistance($chars1[$i], $chars2[$j])); // Transpose if ($i && $j && $chars1[$i] == $chars2[$j - 1] && $chars1[$i - 1] == $chars2[$j]) { $a[$mati][$matj] = min($a[$mati][$matj], $a[$mati - 2][$matj - 2] + self::$COST_TRANSPOSE); } } } return $a[$len1][$len2]; }
function leftAndRightPadding($offset) { crawlerLog("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line ' . __LINE__); $before = ''; $middle = StringUtil::getCharAt($this->text, $offset); $after = ''; $infOffset = $offset - 1; $supOffset = $offset + 1; $infPadding = false; $supPadding = false; for ($i = 0; $i < self::$paddingNumber; $i++) { if ($infOffset < 0) { //$before = self::$paddingChar . $before; $before = $before . self::$paddingChar; } else { if (!$infPadding) { $infCh = StringUtil::getCharAt($this->text, $infOffset); $infPadding = self::isSeparator($infCh); } if ($infPadding) { //$before = self::$paddingChar . $before; $before = $before . self::$paddingChar; } else { //$before = $infCh . $before; $before = $before . $infCh; $infOffset--; } } if ($supOffset > $this->textEndOffset) { $after = $after . self::$paddingChar; } else { if (!$supPadding) { $supCh = StringUtil::getCharAt($this->text, $supOffset); $supPadding = self::isSeparator($supCh); } if ($supPadding) { $after = $after . self::$paddingChar; } else { $after = $after . $supCh; $supOffset++; } } } crawlerLog("IN TEXT " . $before . '|' . $middle . '|' . $after); $tableObj = Diacritics::entryExists($before, $middle, $after); if ($tableObj != null) { crawlerLog("Entry Exists"); $ch = $this->getAllCharForms($tableObj, $middle); $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset); $this->resultText .= $textSubstr; $this->hiddenText .= $textSubstr; $this->resultText .= $ch; if (mb_strlen($ch) == 1) { $this->hiddenText .= $ch; } else { $this->hiddenText .= "@@" . ($this->selectCount - 1) . "@@"; } } else { $textSubstr = mb_substr($this->text, $this->lastOffset, $offset - $this->lastOffset + 1); $this->resultText .= $textSubstr; $this->hiddenText .= $textSubstr; } $this->lastOffset = $this->currOffset; }
// Collect the user choices $choices = array(); foreach ($_REQUEST as $name => $value) { if (StringUtil::startsWith($name, 'radio_')) { $choices[substr($name, 6)] = $value; } } // Collect the positions of ambiguous abbreviations $matches = array(); AdminStringUtil::markAbbreviations($def->internalRep, $def->sourceId, $matches); usort($matches, 'positionCmp'); $s = $def->internalRep; foreach ($matches as $i => $m) { if ($choices[count($choices) - 1 - $i] == 'abbrev') { $orig = substr($s, $m['position'], $m['length']); $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? AdminStringUtil::capitalize($m['abbrev']) : $m['abbrev']; $s = substr_replace($s, "#{$replacement}#", $m['position'], $m['length']); } } $def->internalRep = $s; $def->htmlRep = AdminStringUtil::htmlize($def->internalRep, $def->sourceId); $def->abbrevReview = ABBREV_REVIEW_COMPLETE; $def->save(); } $MARKER = 'DEADBEEF'; // any string that won't occur naturally in a definition $def = null; $ids = db_getArray(sprintf('select id from Definition where status != %d and abbrevReview = %d', ST_DELETED, ABBREV_AMBIGUOUS)); if (count($ids)) { $defId = $ids[array_rand($ids, 1)]; $def = Definition::get_by_id($defId);
$lm->regenerateParadigm(); } } } } } util_redirect("placeAccents.php"); } $chars = array(); $searchResults = array(); $lexems = Model::factory('Lexem')->raw_query("select * from Lexem where form not rlike '\\'' and not noAccent order by rand() limit 10")->find_many(); foreach ($lexems as $l) { $charArray = array(); $form = mb_strtoupper($l->form); $len = mb_strlen($form); for ($i = 0; $i < $len; $i++) { $c = StringUtil::getCharAt($form, $i); $charArray[] = ctype_space($c) ? ' ' : $c; } $chars[$l->id] = $charArray; $definitions = Definition::loadByLexemId($l->id); $searchResults[$l->id] = SearchResult::mapDefinitionArray($definitions); } RecentLink::createOrUpdate('Plasare accente'); SmartyWrap::assign('sectionTitle', 'Plasare accente'); SmartyWrap::assign('lexems', $lexems); SmartyWrap::assign('chars', $chars); SmartyWrap::assign('searchResults', $searchResults); SmartyWrap::assign("allStatuses", util_getAllStatuses()); SmartyWrap::assign('recentLinks', RecentLink::loadForUser()); SmartyWrap::displayAdminPage('admin/placeAccents.ihtml');
function leftAndRightPadding($offset) { Applog::log("INSIDE " . __FILE__ . ' - ' . __CLASS__ . '::' . __FUNCTION__ . '() - ' . 'line ' . __LINE__, 4); $before = ''; $middle = StringUtil::getCharAt($this->file, $offset); $after = ''; $infOffset = $offset - 1; $supOffset = $offset + 1; $infPadding = false; $supPadding = false; for ($i = 0; $i < self::$paddingNumber; $i++) { if ($infOffset < 0) { //$before = self::$paddingChar . $before; $before = $before . self::$paddingChar; } else { if (!$infPadding) { $infCh = StringUtil::getCharAt($this->file, $infOffset); $infPadding = self::isSeparator($infCh); } if ($infPadding) { //$before = self::$paddingChar . $before; $before = $before . self::$paddingChar; } else { //$before = $infCh . $before; $before = $before . $infCh; $infOffset--; } } if ($supOffset > $this->fileEndOffset) { $after = $after . self::$paddingChar; } else { if (!$supPadding) { $supCh = StringUtil::getCharAt($this->file, $supOffset); $supPadding = self::isSeparator($supCh); } if ($supPadding) { $after = $after . self::$paddingChar; } else { $after = $after . $supCh; $supOffset++; } } } Diacritics::save2Db($before, $middle, $after); }
static function placeAccent($s, $pos, $vowel) { $i = mb_strlen($s); while ($i && $pos) { $i--; $c = StringUtil::getCharAt($s, $i); if (self::isVowel($c)) { $pos--; } } if (!$pos) { // Sometimes we have to move the accent forward or backward to account // for diphthongs if ($vowel && StringUtil::getCharAt($s, $i) != $vowel) { if ($i > 0 && StringUtil::getCharAt($s, $i - 1) == $vowel) { $i--; } else { if ($i < mb_strlen($s) - 1 && StringUtil::getCharAt($s, $i + 1) == $vowel) { $i++; } else { //print "Nu pot găsi vocala $vowel la poziția $pos în șirul $s\n"; } } } $s = self::insert($s, "'", $i); } return $s; }
function validate($lexem, $original, $variantIds, $meanings) { if (!$lexem->form) { FlashMessage::add('Forma nu poate fi vidă.'); } $numAccents = mb_substr_count($lexem->form, "'"); // Note: we allow multiple accents for lexems like hárcea-párcea if ($numAccents && $lexem->noAccent) { FlashMessage::add('Ați indicat că lexemul nu necesită accent, dar forma conține un accent.'); } else { if (!$numAccents && !$lexem->noAccent) { FlashMessage::add('Adăugați un accent sau debifați câmpul "Necesită accent".'); } } foreach ($lexem->getLexemModels() as $lm) { $hasS = false; $hasP = false; for ($i = 0; $i < mb_strlen($lm->restriction); $i++) { $c = StringUtil::getCharAt($lm->restriction, $i); if ($c == 'T' || $c == 'U' || $c == 'I') { if ($lm->modelType != 'V' && $lm->modelType != 'VT') { FlashMessage::add("Restricția <b>{$c}</b> se aplică numai verbelor"); } } else { if ($c == 'S') { if ($lm->modelType == 'I' || $lm->modelType == 'T') { FlashMessage::add("Restricția <b>S</b> nu se aplică modelului {$lm->modelType}"); } $hasS = true; } else { if ($c == 'P') { if ($lm->modelType == 'I' || $lm->modelType == 'T') { FlashMessage::add("Restricția <b>P</b> nu se aplică modelului {$lm->modelType}"); } $hasP = true; } else { FlashMessage::add("Restricția <b>{$c}</b> este incorectă."); } } } } if ($hasS && $hasP) { FlashMessage::add("Restricțiile <b>S</b> și <b>P</b> nu pot coexista."); } $ifs = $lm->generateInflectedForms(); if (!is_array($ifs)) { $infl = Inflection::get_by_id($ifs); FlashMessage::add(sprintf("Nu pot genera flexiunea '%s' conform modelului %s%s", htmlentities($infl->description), $lm->modelType, $lm->modelNumber)); } } $variantOf = Lexem::get_by_id($lexem->variantOfId); if ($variantOf && !goodForVariantJson($meanings)) { FlashMessage::add("Acest lexem este o variantă a lui {$variantOf} și nu poate avea el însuși sensuri. " . "Este permis doar un sens, fără conținut, pentru indicarea surselor și a registrelor de folosire."); } if ($variantOf && !empty($variantIds)) { FlashMessage::add("Acest lexem este o variantă a lui {$variantOf} și nu poate avea el însuși variante."); } if ($variantOf && $variantOf->id == $lexem->id) { FlashMessage::add("Lexemul nu poate fi variantă a lui însuși."); } foreach ($variantIds as $variantId) { $variant = Lexem::get_by_id($variantId); if ($variant->id == $lexem->id) { FlashMessage::add('Lexemul nu poate fi variantă a lui însuși.'); } if ($variant->variantOfId && $variant->variantOfId != $lexem->id) { $other = Lexem::get_by_id($variant->variantOfId); FlashMessage::add("\"{$variant}\" este deja marcat ca variantă a lui \"{$other}\"."); } $variantVariantCount = Model::factory('Lexem')->where('variantOfId', $variant->id)->count(); if ($variantVariantCount) { FlashMessage::add("\"{$variant}\" are deja propriile lui variante."); } $variantMeanings = Model::factory('Meaning')->where('lexemId', $variant->id)->find_many(); if (!goodForVariant($variantMeanings)) { FlashMessage::add("\"{$variant}\" are deja propriile lui sensuri."); } } if ($lexem->structStatus == Lexem::STRUCT_STATUS_DONE && $original->structStatus != Lexem::STRUCT_STATUS_DONE && !util_isModerator(PRIV_EDIT)) { FlashMessage::add("Doar moderatorii pot marca structurarea drept terminată. Vă rugăm să folosiți valoarea „așteaptă moderarea”."); } return FlashMessage::getMessage() == null; }
function validateRestriction($modelType, $restriction) { $hasS = false; $hasP = false; for ($i = 0; $i < mb_strlen($restriction); $i++) { $char = StringUtil::getCharAt($restriction, $i); if ($char == 'T' || $char == 'U' || $char == 'I') { if ($modelType != 'V' && $modelType != 'VT') { return "Restricția <b>{$char}</b> se aplică numai verbelor"; } } else { if ($char == 'S') { if ($modelType == 'I' || $modelType == 'T') { return "Restricția S nu se aplică modelului {$modelType}"; } $hasS = true; } else { if ($char == 'P') { if ($modelType == 'I' || $modelType == 'T') { return "Restricția P nu se aplică modelului {$modelType}"; } $hasP = true; } else { return "Restricția <b>{$char}</b> este incorectă."; } } } } if ($hasS && $hasP) { return "Restricțiile <b>S</b> și <b>P</b> nu pot coexista."; } return null; }
function parseModel($s) { $len = mb_strlen($s); $i = 0; while ($i < $len && ctype_upper(StringUtil::getCharAt($s, $i))) { $i++; } return [mb_substr($s, 0, $i), mb_substr($s, $i)]; }
static function markAbbreviations($s, $sourceId, &$ambiguousMatches = null) { $abbrevs = self::loadAbbreviations(); $hashMap = self::constructHashMap($s); if (!array_key_exists($sourceId, $abbrevs)) { return $s; } // Do not report two ambiguities at the same position, for example M. and m. $positionsUsed = array(); foreach ($abbrevs[$sourceId] as $from => $tuple) { $matches = array(); // Perform a case-sensitive match if the pattern contains any uppercase, case-insensitive otherwise $modifier = $tuple['hasCaps'] ? "" : "i"; preg_match_all("/{$tuple['regexp']}/u{$modifier}", $s, $matches, PREG_OFFSET_CAPTURE); // We always add the /u modifier for Unicode if (count($matches[1])) { foreach (array_reverse($matches[1]) as $match) { $orig = $match[0]; $position = $match[1]; if (!$hashMap[$position]) { // Don't replace anything if we are already between hash signs if ($tuple['ambiguous']) { if ($ambiguousMatches !== null && !array_key_exists($position, $positionsUsed)) { $ambiguousMatches[] = array('abbrev' => $from, 'position' => $position, 'length' => strlen($orig)); $positionsUsed[$position] = true; } } else { $replacement = StringUtil::isUppercase(StringUtil::getCharAt($orig, 0)) ? self::capitalize($from) : $from; $s = substr_replace($s, "#{$replacement}#", $position, strlen($orig)); array_splice($hashMap, $position, strlen($orig), array_fill(0, 2 + strlen($replacement), true)); } } } } } return $s; }