public function warning_length($original, $translation, $locale) { if (in_array($locale->slug, $this->length_exclude_languages)) { return true; } if (gp_startswith($original, 'number_format_')) { return true; } $len_src = gp_strlen($original); $len_trans = gp_strlen($translation); if (!($this->length_lower_bound * $len_src < $len_trans && $len_trans < $this->length_upper_bound * $len_src) && (!gp_in('_abbreviation', $original) && !gp_in('_initial', $original))) { return __('Lengths of source and translation differ too much.', 'glotpress'); } return true; }
function map_glossary_entries_to_translations_originals($translations, $glossary) { $glossary_entries = GP::$glossary_entry->by_glossary_id($glossary->id); if (empty($glossary_entries)) { return $translations; } $glossary_entries_terms = array(); //Create array of glossary terms, longest first foreach ($glossary_entries as $key => $value) { $glossary_entries_terms[$key] = $value->term; } uasort($glossary_entries_terms, function ($a, $b) { return gp_strlen($a) < gp_strlen($b); }); foreach ($translations as $key => $t) { //Save our current singular/plural strings before attempting any markup change. Also escape now, since we're going to add some html. $translations[$key]->singular_glossary_markup = esc_translation($t->singular); $translations[$key]->plural_glossary_markup = esc_translation($t->plural); //Search for glossary terms in our strings $matching_entries = array(); foreach ($glossary_entries_terms as $i => $term) { $glossary_entry = $glossary_entries[$i]; if (gp_stripos($t->singular . ' ' . $t->plural, $term) !== false) { $matching_entries[$term][] = array('translation' => $glossary_entry->translation, 'pos' => $glossary_entry->part_of_speech, 'comment' => $glossary_entry->comment); } } //Replace terms in strings with markup foreach ($matching_entries as $term => $glossary_data) { $replacement = '<span class="glossary-word" data-translations="' . htmlspecialchars(gp_json_encode($glossary_data), ENT_QUOTES, 'UTF-8') . '">$1</span>'; $translations[$key]->singular_glossary_markup = preg_replace('/\\b(' . preg_quote($term, '/') . '[es|s]?)(?![^<]*<\\/span>)\\b/iu', $replacement, $translations[$key]->singular_glossary_markup); if ($t->plural) { $translations[$key]->plural_glossary_markup = preg_replace('/\\b(' . preg_quote($term, '/') . '[es|s]?)(?![^<]*<\\/span>)\\b/iu', $replacement, $translations[$key]->plural_glossary_markup); } } } return $translations; }
function closest_original($input, $other_strings) { if (empty($other_strings)) { return null; } $input_length = gp_strlen($input); $closest_similarity = 0; foreach ($other_strings as $compared_string) { $compared_string_length = gp_strlen($compared_string); $max_length_diff = apply_filters('gp_original_import_max_length_diff', 0.5); if (abs(($input_length - $compared_string_length) / $input_length) > $max_length_diff) { continue; } $similarity = gp_string_similarity($input, $compared_string); if ($similarity > $closest_similarity) { $closest = $compared_string; $closest_similarity = $similarity; } } if (!isset($closest)) { return null; } $min_score = apply_filters('gp_original_import_min_similarity_diff', 0.8); $close_enough = $closest_similarity > $min_score; do_action('post_string_similiary_test', $input, $closest, $closest_similarity, $close_enough); if ($close_enough) { return $closest; } else { return null; } }
public function closest_original($input, $other_strings) { if (empty($other_strings)) { return null; } $input_length = gp_strlen($input); $closest_similarity = 0; foreach ($other_strings as $compared_string) { $compared_string_length = gp_strlen($compared_string); /** * Filter the maximum length difference allowed when comparing originals for a close match when importing. * * @since 1.0.0 * * @param float $max_length_diff The times compared string length can differ from the input string. */ $max_length_diff = apply_filters('gp_original_import_max_length_diff', 0.5); if (abs(($input_length - $compared_string_length) / $input_length) > $max_length_diff) { continue; } $similarity = gp_string_similarity($input, $compared_string); if ($similarity > $closest_similarity) { $closest = $compared_string; $closest_similarity = $similarity; } } if (!isset($closest)) { return null; } /** * Filter the minimum allowed similarity to be considered as a close match. * * @since 1.0.0 * * @param float $similarity Minimum allowed similarity. */ $min_score = apply_filters('gp_original_import_min_similarity_diff', 0.8); $close_enough = $closest_similarity > $min_score; /** * Fires before determining string similarity. * * @since 1.0.0 * * @param string $input The original string to match against. * @param string $closest Closest matching string. * @param float $closest_similarity The similarity between strings that was calculated. * @param bool $close_enough Whether the closest was be determined as close enough match. */ do_action('gp_post_string_similiary_test', $input, $closest, $closest_similarity, $close_enough); if ($close_enough) { return $closest; } else { return null; } }
function gp_string_similarity($str1, $str2) { $length1 = gp_strlen($str1); $length2 = gp_strlen($str2); $len = min($length1, $length2); if ($len > 5000) { //Arbitrary limit on character length for speed purpose. $distance = $len; } else { $distance = gp_levenshtein($str1, $str2, $length1, $length2); } $similarity = 1 - $distance * 0.9 / $len; return $similarity; }