function strip_diacriticals($in) { if (function_exists('transliterator_transliterate')) { // PHP 5.4 + intl return transliterator_transliterate('Any-Latin; Latin-ASCII', $in); } if (function_exists('normalizer_normalize')) { // PHP 5.3 + intl return normalizer_normalize(preg_replace('/\\p{Mn}+/u', '', normalizer_normalize($in, Normalizer::FORM_D)), Normalizer::FORM_C); } if (function_exists('iconv') && ICONV_IMPL == 'glibc') { return iconv('UTF-8', 'ASCII//TRANSLIT', $in); } // version incomplète (limitée au latin1) $patterns = array('~&([A-za-z])(?:grave|acute|circ|tilde|uml|ring|cedil|slash|caron);~' => '\\1', '~&([A-za-z]{2})lig;~' => '\\1', '~&[^;]+;~' => ''); $out = htmlentities($in, ENT_NOQUOTES, 'UTF-8'); $out = preg_replace(array_keys($patterns), array_values($patterns), $out); return $out; }
private static function cleanChars($book) { foreach ($book as $key => $value) { $value = normalizer_normalize($value, \Normalizer::FORM_C); $value = preg_replace('/\\x{009c}/u', '', $value); $value = preg_replace('/\\x{0098}/u', '', $value); //debug (json_encode($value)); $book[$key] = $value; } return $book; }
public static function clean($string) { $string = filter_var($string, FILTER_SANITIZE_STRING, FILTER_FLAG_NO_ENCODE_QUOTES); // Ensure bytestream is valid if (!mb_check_encoding($string, 'UTF-8')) { throw new \InvalidArgumentException('Invalid unicode input.'); } // Clean and normalise unicode $string = iconv('UTF-8', 'UTF-8//IGNORE', $string); $string = normalizer_normalize($string); // Strip control characters $string = preg_replace('~\\p{C}+~u', '', $string); return $string; }
/** * Normalizes the encoding of a string (UTF8 NFD to NFC). * * On HFS+ filesystem (OS X) filenames are stored in UTF8 NFD while all other filesystems are * using UTF8 NFC. NFC is more common in general. * * @param string $string Input string * * @return string */ public static function normalizeEncoding($string) { static $normalizer; if (null === $normalizer) { if (function_exists('normalizer_normalize')) { $normalizer = function ($string) { return normalizer_normalize($string, Normalizer::FORM_C); }; } else { $normalizer = function ($string) { return str_replace(["Ä", "ä", "Ö", "ö", "Ü", "ü"], ['Ä', 'ä', 'Ö', 'ö', 'Ü', 'ü'], $string); }; } } return $normalizer($string); }
/** * Enhanced 'remove_accents'. If the php Normalizer extension installed, use it. * * @since 1.0.0 * * @see remove_accents() * * @param string $string Text that might have accent characters * @return string Filtered string with replaced "nice" characters. */ public function remove_accents($string) { if (function_exists('normalizer_normalize')) { if (!normalizer_is_normalized($string, Normalizer::FORM_C)) { $string = normalizer_normalize($string, Normalizer::FORM_C); } } return remove_accents($string); }
public static function encodeRelativeDescriptorString($str) { if (function_exists('normalizer_normalize')) { $str = normalizer_normalize($str); } $str = Z_Unicode::convertCharStr2UTF8($str); // convertNumbers2Char($str, 'hex') $str = preg_replace_callback("/([A-Fa-f0-9]{2})/", function ($matches) { return Z_Unicode::hex2char($matches[0]); }, str_replace(" ", "", $str)); return $str; }
/** * Remove all unwanted caracters * * @param string $text * * @return string */ public function normalize($text) { return preg_replace('/\\pM*/u', '', normalizer_normalize($text, \Normalizer::FORM_D)); }
/** * Tests that "normalizer_normalize" exists and works * * @return bool */ public static function hasNormalizerSupport() { static $ret = null; if (null === $ret) { $form_c = "Å"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) $form_d = "Å"; // A followed by 'COMBINING RING ABOVE' (U+030A) $ret = function_exists('normalizer_normalize') && $form_c === normalizer_normalize($form_d); } return $ret; }
function searchable($text, $lang = false) { global $cfg; if (function_exists('normalizer_normalize')) { // Normalize text input :: remove diacritics and such $text = normalizer_normalize($text, Normalizer::FORM_C); } else { // As a lightweight compatiblity, use a lightweight C // normalizer with diacritic removal, thanks // http://ahinea.com/en/tech/accented-translate.html $tr = array("ä" => "a", "ñ" => "n", "ö" => "o", "ü" => "u", "ÿ" => "y"); $text = strtr($text, $tr); } // Decompose compatible versions of characters (ä => ae) $tr = array("ß" => "ss", "Æ" => "AE", "æ" => "ae", "IJ" => "IJ", "ij" => "ij", "Œ" => "OE", "œ" => "oe", "Ð" => "D", "Đ" => "D", "ð" => "d", "đ" => "d", "Ħ" => "H", "ħ" => "h", "ı" => "i", "ĸ" => "k", "Ŀ" => "L", "Ł" => "L", "ŀ" => "l", "ł" => "l", "Ŋ" => "N", "ʼn" => "n", "ŋ" => "n", "Ø" => "O", "ø" => "o", "ſ" => "s", "Þ" => "T", "Ŧ" => "T", "þ" => "t", "ŧ" => "t", "ä" => "ae", "ö" => "oe", "ü" => "ue", "Ä" => "AE", "Ö" => "OE", "Ü" => "UE"); $text = strtr($text, $tr); // Drop separated diacritics $text = preg_replace('/\\p{M}/u', '', $text); // Drop extraneous whitespace $text = preg_replace('/(\\s)\\s+/u', '$1', $text); // Drop leading and trailing whitespace $text = trim($text); if (false && class_exists('IntlBreakIterator')) { // Split by word boundaries if ($tokenizer = IntlBreakIterator::createWordInstance($lang ?: ($cfg ? $cfg->getSystemLanguage() : 'en_US'))) { $tokenizer->setText($text); $tokens = array(); foreach ($tokenizer as $token) { $tokens[] = $token; } $text = implode(' ', $tokens); } } else { // Approximate word boundaries from Unicode chart at // http://www.unicode.org/reports/tr29/#Word_Boundaries // Punt for now } return $text; }
/** * Normalise a UTf-8 string to FORM_C, avoiding the pitfalls in PHP's * normalizer_normalize function. * @param string $string the input string. * @return string the normalised string. */ protected static function safe_normalize($string) { if ($string === '') { return ''; } if (!function_exists('normalizer_normalize')) { return $string; } $normalised = normalizer_normalize($string, Normalizer::FORM_C); if (is_null($normalised)) { // An error occurred in normalizer_normalize, but we have no idea what. debugging('Failed to normalise string: ' . $string, DEBUG_DEVELOPER); return $string; // Return the original string, since it is the best we have. } return $normalised; }
/** * * @see \X501\StringPrep\StringPrep::prepare() * @param string $string UTF-8 encoded string * @return string */ public function apply($string) { return normalizer_normalize($string, \Normalizer::NFKC); }
protected static function normalizeValue($value) { return isset($value) ? preg_replace('/(\p{M})/ui', '', normalizer_normalize($value, Normalizer::FORM_D)) : NULL; }
function fewer_specials($w, $strip = "̄") { $w = normalizer_normalize($w, Normalizer::FORM_D); $w = str_replace("æ", "ae", $w); $w = str_replace("œ", "oe", $w); $w = str_replace("Æ", "ae", $w); $w = str_replace("Œ", "oe", $w); $w = str_replace($strip, "", $w); return $w; }
/** * Normalize the given UTF-8 string * * @see http://stackoverflow.com/a/7934397/99923 * @param string $string to normalize * @param int $form to normalize as * @return string */ public static function normalize($string, $form = Normalizer::FORM_KD) { return normalizer_normalize($string, $form); }
function normalize_keyword($keyword) { global $normalize_keywords, $keywords_remove_diacritics; //Normalize the text if function available if ($normalize_keywords && function_exists('normalizer_normalize')) { $keyword = normalizer_normalize($keyword); } if ($keywords_remove_diacritics) { $keyword = remove_accents($keyword); } return $keyword; }
/** * Remove all unwanted characters and unset the optional data, if it is too long. * * @param array $additionalData * * @return array */ public static function normalize(array $additionalData) { foreach ($additionalData as $key => $value) { $additionalData[$key] = preg_replace('/\\pM*/u', '', normalizer_normalize($value, \Normalizer::FORM_D)); if (strlen($additionalData[$key]) > self::getAdditionalDataMaxLength($key)) { unset($additionalData[$key]); } } return $additionalData; }
/** * Normalize the note to Unicode NFC. * * @param string The original note * * @return string The normalized note */ public function normalizeNFC($note) { return normalizer_normalize($note, Normalizer::FORM_C); }
function sil_dictionary_custom_join($join) { global $wp_query, $wpdb; $search_table_name = SEARCHTABLE; /* * The query I'm going for will hopefully end up looking something like this * example: * SELECT id, language_code, relevance, post_title * FROM wp_posts p * JOIN ( * SELECT post_id, language_code, MAX(relevance) AS relevance, search_strings * FROM sil_multilingual_search * WHERE search_strings like '%sleeping%' * GROUP BY post_id, language_code * ORDER BY relevance DESC * ) sil_multilingual_search ON sil_multilingual_search.post_id = p.id * ORDER BY relevance DESC, post_title; */ mb_internal_encoding("UTF-8"); if (!empty($wp_query->query_vars['s'])) { //search string gets trimmed and normalized to NFC if (class_exists("Normalizer", $autoload = false)) { $search = normalizer_normalize(trim($wp_query->query_vars['s']), Normalizer::FORM_C); } else { $search = trim($wp_query->query_vars['s']); } $key = $_GET['key']; if (!isset($key)) { $key = $wp_query->query_vars['langcode']; } $partialsearch = $_GET['partialsearch']; if (!isset($_GET['partialsearch'])) { $partialsearch = get_option("include_partial_words"); } if (strlen($search) == 0 && $_GET['tax'] > 1) { $partialsearch = 1; } $subquery_where = ""; if (strlen(trim($key)) > 0) { $subquery_where .= " WHERE " . $search_table_name . ".language_code = '{$key}' "; } $subquery_where .= empty($subquery_where) ? " WHERE " : " AND "; if (isset($wp_query->query_vars['letter'])) { $letter = trim($wp_query->query_vars['letter']); $noletters = trim($wp_query->query_vars['noletters']); //by default we use collate utf8_bin and à, ä, etc. are handled as different letters $collate = "COLLATE 'UTF8_BIN'"; if (get_option('IncludeCharactersWithDiacritics') == 1) { $collate = ""; } $subquery_where .= "(" . $search_table_name . ".search_strings LIKE '" . addslashes($letter) . "%' " . $collate . " OR " . $search_table_name . ".search_strings LIKE '" . addslashes(strtoupper($letter)) . "%' " . $collate . " OR " . $search_table_name . ".search_strings LIKE '" . addslashes("-" . $letter) . "%' " . $collate . ") " . " AND relevance >= 95 AND language_code = '{$key}' "; $arrNoLetters = explode(",", $noletters); foreach ($arrNoLetters as $noLetter) { if (strlen($noLetter) > 0) { $subquery_where .= " AND " . $search_table_name . ".search_strings NOT LIKE '" . $noLetter . "%' " . $collate . " AND " . $search_table_name . ".search_strings NOT LIKE '" . strtoupper($noLetter) . "%' " . $collate; } } } else { if (is_CJK($search) || mb_strlen($search) > 3 || $partialsearch == 1) { $subquery_where .= $search_table_name . ".search_strings LIKE '%" . addslashes($search) . "%'"; } else { if (mb_strlen($search) > 1) { $subquery_where .= $search_table_name . ".search_strings REGEXP '[[:<:]]" . addslashes($search) . "[[:>:]]'"; } } } //if($_GET['tax'] < 1) //{ $subquery = " (SELECT post_id, language_code, MAX(relevance) AS relevance, search_strings, sortorder " . "FROM " . $search_table_name . $subquery_where . " GROUP BY post_id, language_code, search_strings " . " ORDER BY relevance DESC) "; $join = " JOIN " . $subquery . $search_table_name . " ON {$wpdb->posts}.ID = " . $search_table_name . ".post_id "; //} } $tax = 0; if (isset($_GET['tax'])) { $tax = $_GET['tax']; } if ($tax > 1 || strlen($wp_query->query_vars['semdomain']) > 0) { $join .= " LEFT JOIN {$wpdb->term_relationships} ON {$wpdb->posts}.ID = {$wpdb->term_relationships}.object_id "; $join .= " INNER JOIN {$wpdb->term_taxonomy} ON {$wpdb->term_relationships}.term_taxonomy_id = {$wpdb->term_taxonomy}.term_id "; if (get_option("useSemDomainNumbers") == 1) { $join .= " INNER JOIN {$wpdb->terms} ON {$wpdb->term_relationships}.term_taxonomy_id = {$wpdb->terms}.term_id "; } } return $join; }
function no_specials2($w, $extras = "1-9/; ,\\n") { $w = normalizer_normalize($w, Normalizer::FORM_D); /*$w = str_replace("æ", "ae", $w); $w = str_replace("œ", "oe", $w); $w = str_replace("Æ", "ae", $w); $w = str_replace("Œ", "oe", $w); $w = str_replace("þ", "th", $w); $w = str_replace("Þ", "th", $w); $w = str_replace("ð", "th", $w); $w = str_replace("Ð", "th", $w); /*/ $w = transliterator_transliterate('Any-Latin; Latin-ASCII', $w); # TODO: use more on this function /**/ $w = preg_replace("#[^\\p{L}{$extras}]#ui", "", $w); return $w; }
/** * Convert a UTF-8 string to normal form KD, compatibility decomposition. * This may cause irreversible information loss, use judiciously. * Fast return for pure ASCII strings. * * @param $string String: a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form KD */ static function toNFKD($string) { if (NORMALIZE_INTL) { return normalizer_normalize($string, Normalizer::FORM_KD); } elseif (NORMALIZE_ICU) { return utf8_normalize($string, UNORM_NFKD); } elseif (preg_match('/[\\x80-\\xff]/', $string)) { return UtfNormal::NFKD($string); } else { return $string; } }
$zeilennrs[] = $linenr++; } $text2 = implode("<br>", $zeilennrs); $y = $pdf->getY(); $pdf->writeHTMLCell(10, '', 12, $y, $text2, 0, 0, 0, true, '', true); $pdf->writeHTMLCell(170, '', 24, '', $text, 0, 1, 0, true, '', true); $pdf->Ln(4); } } if ($aenderungsantrag->aenderung_begruendung_html) { $begruendung = $aenderungsantrag->aenderung_begruendung; } else { $begruendung = HtmlBBcodeUtils::bbcode2html($aenderungsantrag->aenderung_begruendung); } if (function_exists("normalizer_normalize")) { $begruendung = normalizer_normalize($begruendung); } $html = ' </div>'; if (trim($begruendung) != "") { $html .= ' <h3 style="margin-top: 0;">Begründung</h3> <div class="textholder consolidated"> ' . $begruendung . ' </div>'; } $html .= '</div>'; $pdf->SetFont("helvetica", "", 10); $pdf->writeHTML($html, true, false, true, false, ''); $unterstuetzerInnen = $aenderungsantrag->getUnterstuetzerInnen(); if (count($unterstuetzerInnen) > 0) {
function fake_normalize($string) { if (version_compare(PHP_VERSION, '5.0.0', '>=') && function_exists('normalizer_normalize') && 1 == 2) { if (normalizer_is_normalized($string)) { return $string; } return normalizer_normalize($string); } else { return preg_replace('~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml|mp);~i', '$1', htmlentities($string, ENT_NOQUOTES, 'UTF-8')); } }
?> "><?php printf(__('Search results for "%s"', ZEE_LANG), $searchquery); ?> </h2> <p><?php if (function_exists(sil_dictionary_custom_message)) { sil_dictionary_custom_message(); } ?> </p> <?php if (have_posts()) { //search string are normalized to NFC if (class_exists("Normalizer", $autoload = false)) { $query = normalizer_normalize(stripslashes($_GET['s']), Normalizer::FORM_C); } else { $query = $_GET['s']; } //echo $wp_query->found_posts . " "; //echo getstring("search-results-for-s", "'" . $query . "'"); ?> <div id="searchresults"> <?php while (have_posts()) { the_post(); ?> <div id="post-<?php the_ID(); ?> " <?php
/** * Create a map of file names used in zip archive. * @return void */ protected function init_namelookup() { if ($this->emptyziphack) { $this->namelookup = array(); return; } if (!isset($this->za)) { return; } if (isset($this->namelookup)) { return; } $this->namelookup = array(); if ($this->mode != file_archive::OPEN) { // No need to tweak existing names when creating zip file because there are none yet! return; } if (!file_exists($this->archivepathname)) { return; } if (!($fp = fopen($this->archivepathname, 'rb'))) { return; } if (!($filesize = filesize($this->archivepathname))) { return; } $centralend = self::zip_get_central_end($fp, $filesize); if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xffffffff) { // Single disk archives only and o support for ZIP64, sorry. fclose($fp); return; } fseek($fp, $centralend['offset']); $data = fread($fp, $centralend['size']); $pos = 0; $files = array(); for ($i = 0; $i < $centralend['entries']; $i++) { $file = self::zip_parse_file_header($data, $centralend, $pos); if ($file === false) { // Wrong header, sorry. fclose($fp); return; } $files[] = $file; } fclose($fp); foreach ($files as $file) { $name = $file['name']; if (preg_match('/^[a-zA-Z0-9_\\-\\.]*$/', $file['name'])) { // No need to fix ASCII. $name = fix_utf8($name); } else { if (!($file['general'] & pow(2, 11))) { // First look for unicode name alternatives. $found = false; foreach ($file['extra'] as $extra) { if ($extra['id'] === 0x7075) { $data = unpack('cversion/Vcrc', substr($extra['data'], 0, 5)); if ($data['crc'] === crc32($name)) { $found = true; $name = substr($extra['data'], 5); } } } if (!$found and !empty($this->encoding) and $this->encoding !== 'utf-8') { // Try the encoding from open(). $newname = @textlib::convert($name, $this->encoding, 'utf-8'); $original = textlib::convert($newname, 'utf-8', $this->encoding); if ($original === $name) { $found = true; $name = $newname; } } if (!$found and $file['version'] === 0x315) { // This looks like OS X build in zipper. $newname = fix_utf8($name); if ($newname === $name) { $found = true; $name = $newname; } } if (!$found and $file['version'] === 0) { // This looks like our old borked Moodle 2.2 file. $newname = fix_utf8($name); if ($newname === $name) { $found = true; $name = $newname; } } if (!$found and $encoding = get_string('oldcharset', 'langconfig')) { // Last attempt - try the dos/unix encoding from current language. $windows = true; foreach ($file['extra'] as $extra) { // In Windows archivers do not usually set any extras with the exception of NTFS flag in WinZip/WinRar. $windows = false; if ($extra['id'] === 0xa) { $windows = true; break; } } if ($windows === true) { switch (strtoupper($encoding)) { case 'ISO-8859-1': $encoding = 'CP850'; break; case 'ISO-8859-2': $encoding = 'CP852'; break; case 'ISO-8859-4': $encoding = 'CP775'; break; case 'ISO-8859-5': $encoding = 'CP866'; break; case 'ISO-8859-6': $encoding = 'CP720'; break; case 'ISO-8859-7': $encoding = 'CP737'; break; case 'ISO-8859-8': $encoding = 'CP862'; break; case 'UTF-8': if ($winchar = get_string('localewincharset', 'langconfig')) { // Most probably works only for zh_cn, // if there are more problems we could add zipcharset to langconfig files. $encoding = $winchar; } break; } } $newname = @textlib::convert($name, $encoding, 'utf-8'); $original = textlib::convert($newname, 'utf-8', $encoding); if ($original === $name) { $name = $newname; } } } } $name = str_replace('\\', '/', $name); // no MS \ separators $name = clean_param($name, PARAM_PATH); // only safe chars $name = ltrim($name, '/'); // no leading slash if (function_exists('normalizer_normalize')) { $name = normalizer_normalize($name, Normalizer::FORM_C); } $this->namelookup[$file['name']] = $name; } }
/** * Normalize the UTF-8 input string. * Modes greater than 0 requires php5-intl module. * Please edit this function to implement your custom normalization method. * @param $str (string) UTF-8 string to normalize. * @param $mode (int) Normalization type: NONE=None; C=Normalization Form C (NFC) - Canonical Decomposition followed by Canonical Composition; D=Normalization Form D (NFD) - Canonical Decomposition; KC=Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition; KD=Normalization Form KD (NFKD) - Compatibility Decomposition; CUSTOM=Custom normalization using user defined function 'user_utf8_custom_normalizer'. * @return normalized string using the specified algorithm. */ function F_utf8_normalizer($str, $mode = 'NONE') { switch ($mode) { case 'CUSTOM': if (function_exists('user_utf8_custom_normalizer')) { return call_user_func('user_utf8_custom_normalizer', $str); } else { return $str; } break; case 'C': // Normalization Form C (NFC) - Canonical Decomposition followed by Canonical Composition return normalizer_normalize($str, Normalizer::FORM_C); break; case 'D': // Normalization Form D (NFD) - Canonical Decomposition return normalizer_normalize($str, Normalizer::FORM_D); break; case 'KC': // Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition return normalizer_normalize($str, Normalizer::FORM_KC); break; case 'KD': // Normalization Form KD (NFKD) - Compatibility Decomposition return normalizer_normalize($str, Normalizer::FORM_KD); break; case 'NONE': default: return $str; break; } }
/** * Normalize a unicode string * * @param string $value a not normalized string * @return bool|string */ public static function normalizeUnicode($value) { $normalizedValue = normalizer_normalize($value); if ($normalizedValue === null || $normalizedValue === false) { \OC_Log::write('core', 'normalizing failed for "' . $value . '"', \OC_Log::WARN); } else { $value = $normalizedValue; } return $value; }
/** * For servers without PEAR normalize installed, approximates normalization. With normalizer, executes normalization on string. * * @param string Text to normalize * * @return string Normalized text. */ function wpt_normalize($string) { if (version_compare(PHP_VERSION, '5.0.0', '>=') && function_exists('normalizer_normalize')) { if (normalizer_is_normalized($string)) { return $string; } return normalizer_normalize($string); } else { $normalizer = new WPT_Normalizer(); if ($normalizer->isNormalized($string)) { return $string; } return $normalizer->normalize($string); } }
/** * Check the libicu version */ protected function envCheckLibicu() { $utf8 = function_exists('utf8_normalize'); $intl = function_exists('normalizer_normalize'); /** * This needs to be updated something that the latest libicu * will properly normalize. This normalization was found at * http://www.unicode.org/versions/Unicode5.2.0/#Character_Additions * Note that we use the hex representation to create the code * points in order to avoid any Unicode-destroying during transit. */ $not_normal_c = $this->unicodeChar("FA6C"); $normal_c = $this->unicodeChar("242EE"); $useNormalizer = 'php'; $needsUpdate = false; /** * We're going to prefer the pecl extension here unless * utf8_normalize is more up to date. */ if ($utf8) { $useNormalizer = 'utf8'; $utf8 = utf8_normalize($not_normal_c, UtfNormal::UNORM_NFC); if ($utf8 !== $normal_c) { $needsUpdate = true; } } if ($intl) { $useNormalizer = 'intl'; $intl = normalizer_normalize($not_normal_c, Normalizer::FORM_C); if ($intl !== $normal_c) { $needsUpdate = true; } } // Uses messages 'config-unicode-using-php', 'config-unicode-using-utf8', 'config-unicode-using-intl' if ($useNormalizer === 'php') { $this->showMessage('config-unicode-pure-php-warning'); } else { $this->showMessage('config-unicode-using-' . $useNormalizer); if ($needsUpdate) { $this->showMessage('config-unicode-update-warning'); } } }
public static function compare_string_with_wildcard($string, $pattern, $ignorecase) { // Break the string on non-escaped asterisks. $bits = preg_split('/(?<!\\\\)\\*/', $pattern); // Escape regexp special characters in the bits. $excapedbits = array(); foreach ($bits as $bit) { $excapedbits[] = preg_quote(str_replace('\\*', '*', $bit)); } // Put it back together to make the regexp. $regexp = '|^' . implode('.*', $excapedbits) . '$|u'; // Make the match insensitive if requested to. if ($ignorecase) { $regexp .= 'i'; } if (function_exists('normalizer_normalize')) { $regexp = normalizer_normalize($regexp, Normalizer::FORM_C); $string = normalizer_normalize($string, Normalizer::FORM_C); } return preg_match($regexp, trim($string)); }