function utf8_clean_string($text) { global $phpbb_root_path, $phpEx; static $homographs = array(); if (empty($homographs)) { $homographs = (include $phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx); } $text = utf8_case_fold_nfkc($text); $text = strtr($text, $homographs); // Other control characters $text = preg_replace('#(?:[\\x00-\\x1F\\x7F]+|(?:\\xC2[\\x80-\\x9F])+)#', '', $text); // we can use trim here as all the other space characters should have been turned // into normal ASCII spaces by now return trim($text); }
/** * This function is used to generate a "clean" version of a string. * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC). * Additionally a homographs of one character are transformed into one specific character (preferably ASCII * if it is an ASCII character). * * Please be aware that if you change something within this function or within * functions used here you need to rebuild/update the username_clean column in the users table. And all other * columns that store a clean string otherwise you will break this functionality. * * @param string $text An unclean string, mabye user input (has to be valid UTF-8!) * @return string Cleaned up version of the input string */ function utf8_clean_string($text) { static $homographs = array(); if (empty($homographs)) { $homographs = include(IP_ROOT_PATH . 'includes/utf/data/confusables.' . PHP_EXT); } $text = utf8_case_fold_nfkc($text); $text = strtr($text, $homographs); // Other control characters $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text); // we need to reduce multiple spaces to a single one $text = preg_replace('# {2,}#', ' ', $text); // we can use trim here as all the other space characters should have been turned // into normal ASCII spaces by now return trim($text); }