/** * Converts (Accented) Foreign Characters to ASCII * * @param string $string Input string * @param string $language Language identificator * @return string */ function convert_accented_characters($string, $language = null) { $language = (string) $language; if ($language == '') { $language = config_item('language'); } // See https://github.com/ivantcholakov/transliterate return Transliterate::to_ascii($string, $language); }
function url_title($str, $separator = '-', $lowercase = FALSE, $transliterate_to_ascii = TRUE, $language = NULL) { $language = (string) $language; if ($language == '') { $language = config_item('language'); } $str = strip_tags($str); if ($transliterate_to_ascii) { $str = Transliterate::to_ascii($str, $language); } if ($separator === 'dash') { $separator = '-'; } elseif ($separator === 'underscore') { $separator = '_'; } $q_separator = preg_quote($separator); if (PCRE_UTF8_INSTALLED) { $trans = array('&.+?;' => '', '[^\\p{L}0-9 _-]' => '', '\\s+' => $separator, '(' . $q_separator . ')+' => $separator); foreach ($trans as $key => $val) { $str = preg_replace('#' . $key . '#u', $val, $str); } } else { $trans = array('&.+?;' => '', '[^a-z0-9 _-]' => '', '\\s+' => $separator, '(' . $q_separator . ')+' => $separator); foreach ($trans as $key => $val) { $str = preg_replace('#' . $key . '#i', $val, $str); } } if ($lowercase) { $str = UTF8::strtolower($str); } return trim(trim($str, $separator)); }
public static function searchIndex($arrChunks, $maxDist, &$exactresults, &$moreresults) { $minResLen = 2; foreach ($arrChunks as $chunk) { $transChunk = \Transliterate::trans($chunk); $wordlength = strlen($transChunk); $minLen = $wordlength - $maxDist; $maxLen = $wordlength + $maxDist; $db = \Database::getInstance(); $res = $db->query('SELECT DISTINCT word_transliterated, word FROM tl_search_index WHERE length(word_transliterated) BETWEEN ' . $minLen . ' AND ' . $maxLen); while ($word = $res->fetchRow()) { $lev = levenshtein($transChunk, $word[0]); if ($lev <= $maxDist) { if ($lev == 0) { $exactresults[] = $word[0]; } else { //Clean More Results //Result Length to short if (strlen($word[0]) <= $minResLen) { continue; } //Result has different Type (Prevent matches like XX = 01) if (is_numeric($word[0]) && !is_numeric($transChunk)) { continue; } $moreresults[$lev][] = array('trans' => $word[0], 'org' => $word[1]); } } } } //Sort More-Results by lowest Levensthein difference ksort($moreresults); }