public function utf8_to_ascii_test() { $this->assert_equal("Te glossa mou edosan ellenike", UTF8::transliterate_to_ascii("Τη γλώσσα μου έδωσαν ελληνική")); $this->assert_equal("Na bierieghu pustynnykh voln", UTF8::transliterate_to_ascii("На берегу пустынных волн")); $this->assert_equal("vepxis tqaosani shot`a rust`aveli", UTF8::transliterate_to_ascii("ვეპხის ტყაოსანი შოთა რუსთაველი")); $this->assert_equal("WoNengTunXiaBoLiErBuShangShenTi", UTF8::transliterate_to_ascii("我能吞下玻璃而不伤身体")); }
/** * Replaces special/accented UTF-8 characters by ASCII-7 'equivalents'. * * @param string string to transliterate * @param integer -1 lowercase only, +1 uppercase only, 0 both cases * @return string */ public static function transliterate_to_ascii($str, $case = 0) { static $UTF8_SPECIAL_CHARS = NULL; if ($UTF8_SPECIAL_CHARS === null) { $UTF8_SPECIAL_CHARS = array('⁰' => '0', '₀' => '0', '¹' => '1', 'ˡ' => 'l', '₁' => '1', '²' => '2', '₂' => '2', '³' => '3', '₃' => '3', '⁴' => '4', '₄' => '4', '⁵' => '5', '₅' => '5', '⁶' => '6', '₆' => '6', '⁷' => '7', '₇' => '7', '⁸' => '8', '₈' => '8', '⁹' => '9', '₉' => '9', '¼' => '1/4', '½' => '1/2', '¾' => '3/4', '⅓' => '1/3', '⅔' => '2/3', '⅕' => '1/5', '⅖' => '2/5', '⅗' => '3/5', '⅘' => '4/5', '⅙' => '1/6', '⅚' => '5/6', '⅛' => '1/8', '⅜' => '3/8', '⅝' => '5/8', '⅞' => '7/8', '⅟' => '1/', '⁺' => '+', '₊' => '+', '⁻' => '-', '₋' => '-', '⁼' => '=', '₌' => '=', '⁽' => '(', '₍' => '(', '⁾' => ')', '₎' => ')', 'ª' => 'a', '@' => 'a', '€' => 'e', 'ⁿ' => 'n', '°' => 'o', 'º' => 'o', '¤' => 'o', 'ˣ' => 'x', 'ʸ' => 'y', '$' => 'S', '©' => '(c)', '℠' => 'SM', '℡' => 'TEL', '™' => 'TM', 'ä' => 'ae', 'Ä' => 'Ae', 'ö' => 'oe', 'Ö' => 'Oe', 'ü' => 'ue', 'Ü' => 'eE', 'å' => 'aa', 'Å' => 'Aa'); } $str = str_replace(array_keys($UTF8_SPECIAL_CHARS), array_values($UTF8_SPECIAL_CHARS), $str); return UTF8::transliterate_to_ascii($str, $case); }
/** * Converts a slug to value valid for a URL. * * We could validate it by setting a rule, but for the most part, who cares? * * @param mixed $value * @return mixed */ public static function slug($value) { $value = UTF8::transliterate_to_ascii($value); // Only allow slashes, dashes, and lowercase letters $value = preg_replace('/[^a-z0-9-\\/]/', '-', strtolower($value)); // Strip multiple dashes $value = preg_replace('/-{2,}/', '-', $value); // Trim an ending or starting dashes $value = trim($value, '-'); return $value; }
public static function title($title, $separator = '-', $ascii_only = FALSE) { if ($ascii_only === TRUE) { $title = UTF8::transliterate_to_ascii($title); $title = preg_replace('![^' . preg_quote($separator) . 'a-z0-9\\s]+!', '', strtolower($title)); } else { $title = preg_replace('![^' . preg_quote($separator) . '\\pL\\pN\\s]+!u', '', UTF8::strtolower($title)); } $title = preg_replace('![' . preg_quote($separator) . '\\s]+!u', $separator, $title); return trim($title, $separator); }
/** * Generate safe name for file * * @param string filename * @param integer max filename length * @return string safe filename * @return boolean FALSE, if filename can not be safed */ public static function safe_name($filename, $remove_spaces = TRUE, $max_length = NULL) { if ($filename) { $info = pathinfo($filename); $fname = trim($info['filename']); if ($remove_spaces === TRUE) { // Remove spaces and other separators from the filename $fname = preg_replace('/[\\pZ]+/uD', '_', $fname); } // Remove any punctuation from filename $fname = preg_replace('/[\\p{Po}]+/uD', '', $fname); $fname = trim($fname); // Get the extension from the filename $extension = Arr::get($info, 'extension', ''); $ext_length = strlen($extension); $ext_length and ++$ext_length; // Extension length with dot if ($max_length !== NULL and $ext_length > $max_length) { return FALSE; } if ($ext_length and preg_match('/[^a-zA-Z0-9]/', $extension)) { // Extension is invalid return FALSE; } // Transliterate filename $fname = UTF8::transliterate_to_ascii($fname); // Remove any special characters from filename $fname = preg_replace('/[^-a-zA-Z0-9_]+/', '', $fname); if ($fname == '') { // Use sha1 hash as safe name $fname = sha1($info['basename']); } if ($max_length !== NULL and strlen($fname) + $ext_length > $max_length) { $fname = substr($fname, 0, $max_length - $ext_length); if (!strlen($fname)) { // Can not truncate filename return FALSE; } } $filename = $info['dirname'] !== '.' ? $info['dirname'] . DIRECTORY_SEPARATOR : ''; $filename .= $fname . ($ext_length ? '.' . $extension : ''); } return $filename; }
/** * Converts a slug to value valid for a URL. * * @param mixed $value * @return mixed * @uses UTF8::transliterate_to_ascii * @credits Kohana-Team */ public function set(Jam_Validated $model, $value, $is_changed) { list($value, $return) = $this->_default($model, $value); if (!$return) { if ($this->ascii_only === TRUE) { // Transliterate value to ASCII $value = UTF8::transliterate_to_ascii($value); } // Set preserved characters $preserved_characters = preg_quote($this->separator); // Add hierarchy separator to preserved characters if set if ($this->hierarchy_separator) { $preserved_characters .= preg_quote($this->hierarchy_separator); } // Remove all characters that are not in preserved characters, a-z, 0-9, or whitespace $value = preg_replace('![^' . $preserved_characters . 'a-z0-9\\s]+!', '', strtolower($value)); // Remove whitespace around hierarchy separators if hierarchy separator is set if ($this->hierarchy_separator) { $value = preg_replace('/\\s*([' . preg_quote($this->hierarchy_separator, '/') . '])\\s*/', '$1', $value); } // Replace all separator characters and whitespace by a single separator $value = preg_replace('![' . preg_quote($this->separator) . '\\s]+!u', $this->separator, $value); // Trim separators from the beginning and end $value = trim($value, $this->separator); // Check if hierarchy separator is set if ($this->hierarchy_separator) { // Replace all hierarchy separators by a single hierarchy separator $value = preg_replace('![' . preg_quote($this->hierarchy_separator) . ']+!u', $this->hierarchy_separator, $value); // Trim hierarchy separators from the beginning and end $value = trim($value, $this->hierarchy_separator); // Look for separators again at the beginning and end just in case $value = trim($value, $this->separator); } } return $value; }
/** * Tests UTF8::transliterate_to_ascii * * @test * @dataProvider provider_transliterate_to_ascii */ public function test_transliterate_to_ascii($input, $case, $expected) { $this->assertSame($expected, UTF8::transliterate_to_ascii($input, $case)); }
/** * Convert a phrase to a URL-safe title. * * echo URL::title('My Blog Post'); // "my-blog-post" * * @param string $title Phrase to convert * @param string $separator Word separator (any single character) * @param boolean $ascii_only Transliterate to ASCII? * @return string * @uses UTF8::transliterate_to_ascii */ public static function title($title, $separator = '-', $ascii_only = FALSE) { if ($ascii_only === TRUE) { // Transliterate non-ASCII characters $title = UTF8::transliterate_to_ascii($title); // Remove all characters that are not the separator, a-z, 0-9, or whitespace $title = preg_replace('![^' . preg_quote($separator) . 'a-z0-9\\s]+!', $separator, strtolower($title)); } else { // Remove all characters that are not the separator, letters, numbers, or whitespace $title = preg_replace('![^' . preg_quote($separator) . '\\pL\\pN\\s]+!u', '', UTF8::strtolower($title)); } // Replace all separator characters and whitespace by a single separator $title = preg_replace('![' . preg_quote($separator) . '\\s]+!u', $separator, $title); // Trim separators from the beginning and end return trim($title, $separator); }
/** * Convert a phrase to a URL-safe title. * * echo URL::title('Мой блог пост'); // "moi-blog-post" * * @param string $title Phrase to convert * @param string $separator Word separator (any single character) * @return string * @uses UTF8::ru_translit * @uses UTF8::transliterate_to_ascii */ public static function to_slug($text, $separator = '-') { $value = UTF8::ru_translit($text); // Transliterate value to ASCII $value = UTF8::transliterate_to_ascii($value); // Set preserved characters $preserved_characters = preg_quote($separator); // Remove all characters that are not in preserved characters, a-z, 0-9, point or whitespace $value = preg_replace('![^' . $preserved_characters . 'a-z0-9.\\s]+!', '', strtolower($value)); // Replace all separator characters and whitespace by a single separator $value = preg_replace('![' . preg_quote($separator) . '\\s]+!u', $separator, $value); // Trim separators from the beginning and end return trim($value, $separator); }
public function bench_utf8($subject) { return UTF8::transliterate_to_ascii($subject); }
/** * Method to make a filename safe for writing on the filesystem, removing all strange characters * @param string $filename * @return string */ public static function sanitize($filename, $separator = '-') { // Transliterate strange chars $filename = UTF8::transliterate_to_ascii($filename); // Sanitize the filename $filename = preg_replace('/[^a-z0-9-\\.]/', $separator, strtolower($filename)); // Remove spaces $filename = preg_replace('/\\s+/u', $separator, $filename); // Strip multiple dashes $filename = preg_replace('/-{2,}/', $separator, $filename); return $filename; }