Exemplo n.º 1
0
 function utf8_clean_string($text)
 {
     global $phpbb_root_path, $phpEx;
     static $homographs = array();
     if (empty($homographs)) {
         $homographs = (include $phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
     }
     $text = utf8_case_fold_nfkc($text);
     $text = strtr($text, $homographs);
     // Other control characters
     $text = preg_replace('#(?:[\\x00-\\x1F\\x7F]+|(?:\\xC2[\\x80-\\x9F])+)#', '', $text);
     // we can use trim here as all the other space characters should have been turned
     // into normal ASCII spaces by now
     return trim($text);
 }
Exemplo n.º 2
0
/**
* This function is used to generate a "clean" version of a string.
* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
* if it is an ASCII character).
*
* Please be aware that if you change something within this function or within
* functions used here you need to rebuild/update the username_clean column in the users table. And all other
* columns that store a clean string otherwise you will break this functionality.
*
* @param	string	$text	An unclean string, mabye user input (has to be valid UTF-8!)
* @return	string			Cleaned up version of the input string
*/
function utf8_clean_string($text)
{

	static $homographs = array();
	if (empty($homographs))
	{
		$homographs = include(IP_ROOT_PATH . 'includes/utf/data/confusables.' . PHP_EXT);
	}

	$text = utf8_case_fold_nfkc($text);
	$text = strtr($text, $homographs);
	// Other control characters
	$text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);

	// we need to reduce multiple spaces to a single one
	$text = preg_replace('# {2,}#', ' ', $text);

	// we can use trim here as all the other space characters should have been turned
	// into normal ASCII spaces by now
	return trim($text);
}