Example #1
1
function strip_diacriticals($in)
{
    if (function_exists('transliterator_transliterate')) {
        // PHP 5.4 + intl
        return transliterator_transliterate('Any-Latin; Latin-ASCII', $in);
    }
    if (function_exists('normalizer_normalize')) {
        // PHP 5.3 + intl
        return normalizer_normalize(preg_replace('/\\p{Mn}+/u', '', normalizer_normalize($in, Normalizer::FORM_D)), Normalizer::FORM_C);
    }
    if (function_exists('iconv') && ICONV_IMPL == 'glibc') {
        return iconv('UTF-8', 'ASCII//TRANSLIT', $in);
    }
    // version incomplète (limitée au latin1)
    $patterns = array('~&([A-za-z])(?:grave|acute|circ|tilde|uml|ring|cedil|slash|caron);~' => '\\1', '~&([A-za-z]{2})lig;~' => '\\1', '~&[^;]+;~' => '');
    $out = htmlentities($in, ENT_NOQUOTES, 'UTF-8');
    $out = preg_replace(array_keys($patterns), array_values($patterns), $out);
    return $out;
}
 private static function cleanChars($book)
 {
     foreach ($book as $key => $value) {
         $value = normalizer_normalize($value, \Normalizer::FORM_C);
         $value = preg_replace('/\\x{009c}/u', '', $value);
         $value = preg_replace('/\\x{0098}/u', '', $value);
         //debug (json_encode($value));
         $book[$key] = $value;
     }
     return $book;
 }
Example #3
0
 public static function clean($string)
 {
     $string = filter_var($string, FILTER_SANITIZE_STRING, FILTER_FLAG_NO_ENCODE_QUOTES);
     // Ensure bytestream is valid
     if (!mb_check_encoding($string, 'UTF-8')) {
         throw new \InvalidArgumentException('Invalid unicode input.');
     }
     // Clean and normalise unicode
     $string = iconv('UTF-8', 'UTF-8//IGNORE', $string);
     $string = normalizer_normalize($string);
     // Strip control characters
     $string = preg_replace('~\\p{C}+~u', '', $string);
     return $string;
 }
Example #4
0
 /**
  * Normalizes the encoding of a string (UTF8 NFD to NFC).
  *
  * On HFS+ filesystem (OS X) filenames are stored in UTF8 NFD while all other filesystems are
  * using UTF8 NFC. NFC is more common in general.
  *
  * @param string $string Input string
  *
  * @return string
  */
 public static function normalizeEncoding($string)
 {
     static $normalizer;
     if (null === $normalizer) {
         if (function_exists('normalizer_normalize')) {
             $normalizer = function ($string) {
                 return normalizer_normalize($string, Normalizer::FORM_C);
             };
         } else {
             $normalizer = function ($string) {
                 return str_replace(["Ä", "ä", "Ö", "ö", "Ü", "ü"], ['Ä', 'ä', 'Ö', 'ö', 'Ü', 'ü'], $string);
             };
         }
     }
     return $normalizer($string);
 }
 /**
  * Enhanced 'remove_accents'. If the php Normalizer extension installed, use it.
  *
  * @since 1.0.0
  *
  * @see remove_accents()
  *
  * @param string $string Text that might have accent characters
  * @return string Filtered string with replaced "nice" characters.
  */
 public function remove_accents($string)
 {
     if (function_exists('normalizer_normalize')) {
         if (!normalizer_is_normalized($string, Normalizer::FORM_C)) {
             $string = normalizer_normalize($string, Normalizer::FORM_C);
         }
     }
     return remove_accents($string);
 }
Example #6
0
 public static function encodeRelativeDescriptorString($str)
 {
     if (function_exists('normalizer_normalize')) {
         $str = normalizer_normalize($str);
     }
     $str = Z_Unicode::convertCharStr2UTF8($str);
     // convertNumbers2Char($str, 'hex')
     $str = preg_replace_callback("/([A-Fa-f0-9]{2})/", function ($matches) {
         return Z_Unicode::hex2char($matches[0]);
     }, str_replace(" ", "", $str));
     return $str;
 }
 /**
  * Remove all unwanted caracters
  *
  * @param string $text
  *
  * @return string
  */
 public function normalize($text)
 {
     return preg_replace('/\\pM*/u', '', normalizer_normalize($text, \Normalizer::FORM_D));
 }
Example #8
0
 /**
  * Tests that "normalizer_normalize" exists and works
  *
  * @return bool
  */
 public static function hasNormalizerSupport()
 {
     static $ret = null;
     if (null === $ret) {
         $form_c = "Å";
         // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5)
         $form_d = "Å";
         // A followed by 'COMBINING RING ABOVE' (U+030A)
         $ret = function_exists('normalizer_normalize') && $form_c === normalizer_normalize($form_d);
     }
     return $ret;
 }
Example #9
0
 function searchable($text, $lang = false)
 {
     global $cfg;
     if (function_exists('normalizer_normalize')) {
         // Normalize text input :: remove diacritics and such
         $text = normalizer_normalize($text, Normalizer::FORM_C);
     } else {
         // As a lightweight compatiblity, use a lightweight C
         // normalizer with diacritic removal, thanks
         // http://ahinea.com/en/tech/accented-translate.html
         $tr = array("ä" => "a", "ñ" => "n", "ö" => "o", "ü" => "u", "ÿ" => "y");
         $text = strtr($text, $tr);
     }
     // Decompose compatible versions of characters (ä => ae)
     $tr = array("ß" => "ss", "Æ" => "AE", "æ" => "ae", "IJ" => "IJ", "ij" => "ij", "Œ" => "OE", "œ" => "oe", "Ð" => "D", "Đ" => "D", "ð" => "d", "đ" => "d", "Ħ" => "H", "ħ" => "h", "ı" => "i", "ĸ" => "k", "Ŀ" => "L", "Ł" => "L", "ŀ" => "l", "ł" => "l", "Ŋ" => "N", "ʼn" => "n", "ŋ" => "n", "Ø" => "O", "ø" => "o", "ſ" => "s", "Þ" => "T", "Ŧ" => "T", "þ" => "t", "ŧ" => "t", "ä" => "ae", "ö" => "oe", "ü" => "ue", "Ä" => "AE", "Ö" => "OE", "Ü" => "UE");
     $text = strtr($text, $tr);
     // Drop separated diacritics
     $text = preg_replace('/\\p{M}/u', '', $text);
     // Drop extraneous whitespace
     $text = preg_replace('/(\\s)\\s+/u', '$1', $text);
     // Drop leading and trailing whitespace
     $text = trim($text);
     if (false && class_exists('IntlBreakIterator')) {
         // Split by word boundaries
         if ($tokenizer = IntlBreakIterator::createWordInstance($lang ?: ($cfg ? $cfg->getSystemLanguage() : 'en_US'))) {
             $tokenizer->setText($text);
             $tokens = array();
             foreach ($tokenizer as $token) {
                 $tokens[] = $token;
             }
             $text = implode(' ', $tokens);
         }
     } else {
         // Approximate word boundaries from Unicode chart at
         // http://www.unicode.org/reports/tr29/#Word_Boundaries
         // Punt for now
     }
     return $text;
 }
Example #10
0
    /**
     * Normalise a UTf-8 string to FORM_C, avoiding the pitfalls in PHP's
     * normalizer_normalize function.
     * @param string $string the input string.
     * @return string the normalised string.
     */
    protected static function safe_normalize($string) {
        if ($string === '') {
            return '';
        }

        if (!function_exists('normalizer_normalize')) {
            return $string;
        }

        $normalised = normalizer_normalize($string, Normalizer::FORM_C);
        if (is_null($normalised)) {
            // An error occurred in normalizer_normalize, but we have no idea what.
            debugging('Failed to normalise string: ' . $string, DEBUG_DEVELOPER);
            return $string; // Return the original string, since it is the best we have.
        }

        return $normalised;
    }
Example #11
0
 /**
  *
  * @see \X501\StringPrep\StringPrep::prepare()
  * @param string $string UTF-8 encoded string
  * @return string
  */
 public function apply($string)
 {
     return normalizer_normalize($string, \Normalizer::NFKC);
 }
 protected static function normalizeValue($value) {
     return isset($value) ? preg_replace('/(\p{M})/ui', '', normalizer_normalize($value, Normalizer::FORM_D)) : NULL;
 }
Example #13
0
function fewer_specials($w, $strip = "̄")
{
    $w = normalizer_normalize($w, Normalizer::FORM_D);
    $w = str_replace("æ", "ae", $w);
    $w = str_replace("œ", "oe", $w);
    $w = str_replace("Æ", "ae", $w);
    $w = str_replace("Œ", "oe", $w);
    $w = str_replace($strip, "", $w);
    return $w;
}
Example #14
0
 /**
  * Normalize the given UTF-8 string
  *
  * @see http://stackoverflow.com/a/7934397/99923
  * @param string $string to normalize
  * @param int $form to normalize as
  * @return string
  */
 public static function normalize($string, $form = Normalizer::FORM_KD)
 {
     return normalizer_normalize($string, $form);
 }
function normalize_keyword($keyword)
{
    global $normalize_keywords, $keywords_remove_diacritics;
    //Normalize the text if function available
    if ($normalize_keywords && function_exists('normalizer_normalize')) {
        $keyword = normalizer_normalize($keyword);
    }
    if ($keywords_remove_diacritics) {
        $keyword = remove_accents($keyword);
    }
    return $keyword;
}
 /**
  * Remove all unwanted characters and unset the optional data, if it is too long.
  *
  * @param array $additionalData
  *
  * @return array
  */
 public static function normalize(array $additionalData)
 {
     foreach ($additionalData as $key => $value) {
         $additionalData[$key] = preg_replace('/\\pM*/u', '', normalizer_normalize($value, \Normalizer::FORM_D));
         if (strlen($additionalData[$key]) > self::getAdditionalDataMaxLength($key)) {
             unset($additionalData[$key]);
         }
     }
     return $additionalData;
 }
Example #17
0
 /**
  * Normalize the note to Unicode NFC.
  *
  * @param  string  The original note
  *
  * @return string The normalized note
  */
 public function normalizeNFC($note)
 {
     return normalizer_normalize($note, Normalizer::FORM_C);
 }
function sil_dictionary_custom_join($join)
{
    global $wp_query, $wpdb;
    $search_table_name = SEARCHTABLE;
    /*
     * The query I'm going for will hopefully end up looking something like this
     * example:
     * SELECT id, language_code, relevance, post_title
     * FROM wp_posts p
     * JOIN (
     *	SELECT post_id, language_code, MAX(relevance) AS relevance, search_strings
     *	FROM sil_multilingual_search
     *	WHERE search_strings like '%sleeping%'
     *	GROUP BY post_id, language_code
     *	ORDER BY relevance DESC
     *	) sil_multilingual_search ON sil_multilingual_search.post_id = p.id
     * ORDER BY relevance DESC, post_title;
     */
    mb_internal_encoding("UTF-8");
    if (!empty($wp_query->query_vars['s'])) {
        //search string gets trimmed and normalized to NFC
        if (class_exists("Normalizer", $autoload = false)) {
            $search = normalizer_normalize(trim($wp_query->query_vars['s']), Normalizer::FORM_C);
        } else {
            $search = trim($wp_query->query_vars['s']);
        }
        $key = $_GET['key'];
        if (!isset($key)) {
            $key = $wp_query->query_vars['langcode'];
        }
        $partialsearch = $_GET['partialsearch'];
        if (!isset($_GET['partialsearch'])) {
            $partialsearch = get_option("include_partial_words");
        }
        if (strlen($search) == 0 && $_GET['tax'] > 1) {
            $partialsearch = 1;
        }
        $subquery_where = "";
        if (strlen(trim($key)) > 0) {
            $subquery_where .= " WHERE " . $search_table_name . ".language_code = '{$key}' ";
        }
        $subquery_where .= empty($subquery_where) ? " WHERE " : " AND ";
        if (isset($wp_query->query_vars['letter'])) {
            $letter = trim($wp_query->query_vars['letter']);
            $noletters = trim($wp_query->query_vars['noletters']);
            //by default we use collate utf8_bin and à, ä, etc. are handled as different letters
            $collate = "COLLATE 'UTF8_BIN'";
            if (get_option('IncludeCharactersWithDiacritics') == 1) {
                $collate = "";
            }
            $subquery_where .= "(" . $search_table_name . ".search_strings LIKE '" . addslashes($letter) . "%' " . $collate . " OR " . $search_table_name . ".search_strings LIKE '" . addslashes(strtoupper($letter)) . "%' " . $collate . " OR " . $search_table_name . ".search_strings LIKE '" . addslashes("-" . $letter) . "%' " . $collate . ") " . " AND relevance >= 95 AND language_code = '{$key}' ";
            $arrNoLetters = explode(",", $noletters);
            foreach ($arrNoLetters as $noLetter) {
                if (strlen($noLetter) > 0) {
                    $subquery_where .= " AND " . $search_table_name . ".search_strings NOT LIKE '" . $noLetter . "%' " . $collate . " AND " . $search_table_name . ".search_strings NOT LIKE '" . strtoupper($noLetter) . "%' " . $collate;
                }
            }
        } else {
            if (is_CJK($search) || mb_strlen($search) > 3 || $partialsearch == 1) {
                $subquery_where .= $search_table_name . ".search_strings LIKE '%" . addslashes($search) . "%'";
            } else {
                if (mb_strlen($search) > 1) {
                    $subquery_where .= $search_table_name . ".search_strings REGEXP '[[:<:]]" . addslashes($search) . "[[:>:]]'";
                }
            }
        }
        //if($_GET['tax'] < 1)
        //{
        $subquery = " (SELECT post_id, language_code, MAX(relevance) AS relevance, search_strings, sortorder " . "FROM " . $search_table_name . $subquery_where . " GROUP BY post_id, language_code, search_strings " . " ORDER BY relevance DESC) ";
        $join = " JOIN " . $subquery . $search_table_name . " ON {$wpdb->posts}.ID = " . $search_table_name . ".post_id ";
        //}
    }
    $tax = 0;
    if (isset($_GET['tax'])) {
        $tax = $_GET['tax'];
    }
    if ($tax > 1 || strlen($wp_query->query_vars['semdomain']) > 0) {
        $join .= " LEFT JOIN {$wpdb->term_relationships} ON {$wpdb->posts}.ID = {$wpdb->term_relationships}.object_id ";
        $join .= " INNER JOIN {$wpdb->term_taxonomy} ON {$wpdb->term_relationships}.term_taxonomy_id = {$wpdb->term_taxonomy}.term_id ";
        if (get_option("useSemDomainNumbers") == 1) {
            $join .= " INNER JOIN {$wpdb->terms} ON {$wpdb->term_relationships}.term_taxonomy_id = {$wpdb->terms}.term_id ";
        }
    }
    return $join;
}
Example #19
0
function no_specials2($w, $extras = "1-9/; ,\\n")
{
    $w = normalizer_normalize($w, Normalizer::FORM_D);
    /*$w = str_replace("æ", "ae", $w);
    	$w = str_replace("œ", "oe", $w);
    	$w = str_replace("Æ", "ae", $w);
    	$w = str_replace("Œ", "oe", $w);
    	$w = str_replace("þ", "th", $w);
    	$w = str_replace("Þ", "th", $w);
    	$w = str_replace("ð", "th", $w);
    	$w = str_replace("Ð", "th", $w);
    	/*/
    $w = transliterator_transliterate('Any-Latin; Latin-ASCII', $w);
    # TODO: use more on this function
    /**/
    $w = preg_replace("#[^\\p{L}{$extras}]#ui", "", $w);
    return $w;
}
Example #20
0
 /**
  * Convert a UTF-8 string to normal form KD, compatibility decomposition.
  * This may cause irreversible information loss, use judiciously.
  * Fast return for pure ASCII strings.
  *
  * @param $string String: a valid UTF-8 string. Input is not validated.
  * @return string a UTF-8 string in normal form KD
  */
 static function toNFKD($string)
 {
     if (NORMALIZE_INTL) {
         return normalizer_normalize($string, Normalizer::FORM_KD);
     } elseif (NORMALIZE_ICU) {
         return utf8_normalize($string, UNORM_NFKD);
     } elseif (preg_match('/[\\x80-\\xff]/', $string)) {
         return UtfNormal::NFKD($string);
     } else {
         return $string;
     }
 }
            $zeilennrs[] = $linenr++;
        }
        $text2 = implode("<br>", $zeilennrs);
        $y = $pdf->getY();
        $pdf->writeHTMLCell(10, '', 12, $y, $text2, 0, 0, 0, true, '', true);
        $pdf->writeHTMLCell(170, '', 24, '', $text, 0, 1, 0, true, '', true);
        $pdf->Ln(4);
    }
}
if ($aenderungsantrag->aenderung_begruendung_html) {
    $begruendung = $aenderungsantrag->aenderung_begruendung;
} else {
    $begruendung = HtmlBBcodeUtils::bbcode2html($aenderungsantrag->aenderung_begruendung);
}
if (function_exists("normalizer_normalize")) {
    $begruendung = normalizer_normalize($begruendung);
}
$html = '
	</div>';
if (trim($begruendung) != "") {
    $html .= '
	<h3 style="margin-top: 0;">Begründung</h3>
	<div class="textholder consolidated">
		' . $begruendung . '
	</div>';
}
$html .= '</div>';
$pdf->SetFont("helvetica", "", 10);
$pdf->writeHTML($html, true, false, true, false, '');
$unterstuetzerInnen = $aenderungsantrag->getUnterstuetzerInnen();
if (count($unterstuetzerInnen) > 0) {
Example #22
0
function fake_normalize($string)
{
    if (version_compare(PHP_VERSION, '5.0.0', '>=') && function_exists('normalizer_normalize') && 1 == 2) {
        if (normalizer_is_normalized($string)) {
            return $string;
        }
        return normalizer_normalize($string);
    } else {
        return preg_replace('~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml|mp);~i', '$1', htmlentities($string, ENT_NOQUOTES, 'UTF-8'));
    }
}
?>
"><?php 
printf(__('Search results for "%s"', ZEE_LANG), $searchquery);
?>
</h2>
		<p><?php 
if (function_exists(sil_dictionary_custom_message)) {
    sil_dictionary_custom_message();
}
?>
</p>
		<?php 
if (have_posts()) {
    //search string are normalized to NFC
    if (class_exists("Normalizer", $autoload = false)) {
        $query = normalizer_normalize(stripslashes($_GET['s']), Normalizer::FORM_C);
    } else {
        $query = $_GET['s'];
    }
    //echo $wp_query->found_posts . " ";
    //echo getstring("search-results-for-s", "'" . $query . "'");
    ?>
		<div id="searchresults">
			<?php 
    while (have_posts()) {
        the_post();
        ?>
				<div id="post-<?php 
        the_ID();
        ?>
" <?php 
 /**
  * Create a map of file names used in zip archive.
  * @return void
  */
 protected function init_namelookup()
 {
     if ($this->emptyziphack) {
         $this->namelookup = array();
         return;
     }
     if (!isset($this->za)) {
         return;
     }
     if (isset($this->namelookup)) {
         return;
     }
     $this->namelookup = array();
     if ($this->mode != file_archive::OPEN) {
         // No need to tweak existing names when creating zip file because there are none yet!
         return;
     }
     if (!file_exists($this->archivepathname)) {
         return;
     }
     if (!($fp = fopen($this->archivepathname, 'rb'))) {
         return;
     }
     if (!($filesize = filesize($this->archivepathname))) {
         return;
     }
     $centralend = self::zip_get_central_end($fp, $filesize);
     if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xffffffff) {
         // Single disk archives only and o support for ZIP64, sorry.
         fclose($fp);
         return;
     }
     fseek($fp, $centralend['offset']);
     $data = fread($fp, $centralend['size']);
     $pos = 0;
     $files = array();
     for ($i = 0; $i < $centralend['entries']; $i++) {
         $file = self::zip_parse_file_header($data, $centralend, $pos);
         if ($file === false) {
             // Wrong header, sorry.
             fclose($fp);
             return;
         }
         $files[] = $file;
     }
     fclose($fp);
     foreach ($files as $file) {
         $name = $file['name'];
         if (preg_match('/^[a-zA-Z0-9_\\-\\.]*$/', $file['name'])) {
             // No need to fix ASCII.
             $name = fix_utf8($name);
         } else {
             if (!($file['general'] & pow(2, 11))) {
                 // First look for unicode name alternatives.
                 $found = false;
                 foreach ($file['extra'] as $extra) {
                     if ($extra['id'] === 0x7075) {
                         $data = unpack('cversion/Vcrc', substr($extra['data'], 0, 5));
                         if ($data['crc'] === crc32($name)) {
                             $found = true;
                             $name = substr($extra['data'], 5);
                         }
                     }
                 }
                 if (!$found and !empty($this->encoding) and $this->encoding !== 'utf-8') {
                     // Try the encoding from open().
                     $newname = @textlib::convert($name, $this->encoding, 'utf-8');
                     $original = textlib::convert($newname, 'utf-8', $this->encoding);
                     if ($original === $name) {
                         $found = true;
                         $name = $newname;
                     }
                 }
                 if (!$found and $file['version'] === 0x315) {
                     // This looks like OS X build in zipper.
                     $newname = fix_utf8($name);
                     if ($newname === $name) {
                         $found = true;
                         $name = $newname;
                     }
                 }
                 if (!$found and $file['version'] === 0) {
                     // This looks like our old borked Moodle 2.2 file.
                     $newname = fix_utf8($name);
                     if ($newname === $name) {
                         $found = true;
                         $name = $newname;
                     }
                 }
                 if (!$found and $encoding = get_string('oldcharset', 'langconfig')) {
                     // Last attempt - try the dos/unix encoding from current language.
                     $windows = true;
                     foreach ($file['extra'] as $extra) {
                         // In Windows archivers do not usually set any extras with the exception of NTFS flag in WinZip/WinRar.
                         $windows = false;
                         if ($extra['id'] === 0xa) {
                             $windows = true;
                             break;
                         }
                     }
                     if ($windows === true) {
                         switch (strtoupper($encoding)) {
                             case 'ISO-8859-1':
                                 $encoding = 'CP850';
                                 break;
                             case 'ISO-8859-2':
                                 $encoding = 'CP852';
                                 break;
                             case 'ISO-8859-4':
                                 $encoding = 'CP775';
                                 break;
                             case 'ISO-8859-5':
                                 $encoding = 'CP866';
                                 break;
                             case 'ISO-8859-6':
                                 $encoding = 'CP720';
                                 break;
                             case 'ISO-8859-7':
                                 $encoding = 'CP737';
                                 break;
                             case 'ISO-8859-8':
                                 $encoding = 'CP862';
                                 break;
                             case 'UTF-8':
                                 if ($winchar = get_string('localewincharset', 'langconfig')) {
                                     // Most probably works only for zh_cn,
                                     // if there are more problems we could add zipcharset to langconfig files.
                                     $encoding = $winchar;
                                 }
                                 break;
                         }
                     }
                     $newname = @textlib::convert($name, $encoding, 'utf-8');
                     $original = textlib::convert($newname, 'utf-8', $encoding);
                     if ($original === $name) {
                         $name = $newname;
                     }
                 }
             }
         }
         $name = str_replace('\\', '/', $name);
         // no MS \ separators
         $name = clean_param($name, PARAM_PATH);
         // only safe chars
         $name = ltrim($name, '/');
         // no leading slash
         if (function_exists('normalizer_normalize')) {
             $name = normalizer_normalize($name, Normalizer::FORM_C);
         }
         $this->namelookup[$file['name']] = $name;
     }
 }
Example #25
0
/**
 * Normalize the UTF-8 input string.
 * Modes greater than 0 requires php5-intl module.
 * Please edit this function to implement your custom normalization method.
 * @param $str (string) UTF-8 string to normalize.
 * @param $mode (int) Normalization type: NONE=None; C=Normalization Form C (NFC) - Canonical Decomposition followed by Canonical Composition; D=Normalization Form D (NFD) - Canonical Decomposition; KC=Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition; KD=Normalization Form KD (NFKD) - Compatibility Decomposition; CUSTOM=Custom normalization using user defined function 'user_utf8_custom_normalizer'.
 * @return normalized string using the specified algorithm.
 */
function F_utf8_normalizer($str, $mode = 'NONE')
{
    switch ($mode) {
        case 'CUSTOM':
            if (function_exists('user_utf8_custom_normalizer')) {
                return call_user_func('user_utf8_custom_normalizer', $str);
            } else {
                return $str;
            }
            break;
        case 'C':
            // Normalization Form C (NFC) - Canonical Decomposition followed by Canonical Composition
            return normalizer_normalize($str, Normalizer::FORM_C);
            break;
        case 'D':
            // Normalization Form D (NFD) - Canonical Decomposition
            return normalizer_normalize($str, Normalizer::FORM_D);
            break;
        case 'KC':
            // Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition
            return normalizer_normalize($str, Normalizer::FORM_KC);
            break;
        case 'KD':
            // Normalization Form KD (NFKD) - Compatibility Decomposition
            return normalizer_normalize($str, Normalizer::FORM_KD);
            break;
        case 'NONE':
        default:
            return $str;
            break;
    }
}
Example #26
0
 /**
  * Normalize a unicode string
  *
  * @param string $value a not normalized string
  * @return bool|string
  */
 public static function normalizeUnicode($value)
 {
     $normalizedValue = normalizer_normalize($value);
     if ($normalizedValue === null || $normalizedValue === false) {
         \OC_Log::write('core', 'normalizing failed for "' . $value . '"', \OC_Log::WARN);
     } else {
         $value = $normalizedValue;
     }
     return $value;
 }
Example #27
0
/**
 * For servers without PEAR normalize installed, approximates normalization. With normalizer, executes normalization on string.
 * 
 * @param string Text to normalize
 * 
 * @return string Normalized text.
 */
function wpt_normalize($string)
{
    if (version_compare(PHP_VERSION, '5.0.0', '>=') && function_exists('normalizer_normalize')) {
        if (normalizer_is_normalized($string)) {
            return $string;
        }
        return normalizer_normalize($string);
    } else {
        $normalizer = new WPT_Normalizer();
        if ($normalizer->isNormalized($string)) {
            return $string;
        }
        return $normalizer->normalize($string);
    }
}
Example #28
0
 /**
  * Check the libicu version
  */
 protected function envCheckLibicu()
 {
     $utf8 = function_exists('utf8_normalize');
     $intl = function_exists('normalizer_normalize');
     /**
      * This needs to be updated something that the latest libicu
      * will properly normalize.  This normalization was found at
      * http://www.unicode.org/versions/Unicode5.2.0/#Character_Additions
      * Note that we use the hex representation to create the code
      * points in order to avoid any Unicode-destroying during transit.
      */
     $not_normal_c = $this->unicodeChar("FA6C");
     $normal_c = $this->unicodeChar("242EE");
     $useNormalizer = 'php';
     $needsUpdate = false;
     /**
      * We're going to prefer the pecl extension here unless
      * utf8_normalize is more up to date.
      */
     if ($utf8) {
         $useNormalizer = 'utf8';
         $utf8 = utf8_normalize($not_normal_c, UtfNormal::UNORM_NFC);
         if ($utf8 !== $normal_c) {
             $needsUpdate = true;
         }
     }
     if ($intl) {
         $useNormalizer = 'intl';
         $intl = normalizer_normalize($not_normal_c, Normalizer::FORM_C);
         if ($intl !== $normal_c) {
             $needsUpdate = true;
         }
     }
     // Uses messages 'config-unicode-using-php', 'config-unicode-using-utf8', 'config-unicode-using-intl'
     if ($useNormalizer === 'php') {
         $this->showMessage('config-unicode-pure-php-warning');
     } else {
         $this->showMessage('config-unicode-using-' . $useNormalizer);
         if ($needsUpdate) {
             $this->showMessage('config-unicode-update-warning');
         }
     }
 }
Example #29
0
 public static function compare_string_with_wildcard($string, $pattern, $ignorecase)
 {
     // Break the string on non-escaped asterisks.
     $bits = preg_split('/(?<!\\\\)\\*/', $pattern);
     // Escape regexp special characters in the bits.
     $excapedbits = array();
     foreach ($bits as $bit) {
         $excapedbits[] = preg_quote(str_replace('\\*', '*', $bit));
     }
     // Put it back together to make the regexp.
     $regexp = '|^' . implode('.*', $excapedbits) . '$|u';
     // Make the match insensitive if requested to.
     if ($ignorecase) {
         $regexp .= 'i';
     }
     if (function_exists('normalizer_normalize')) {
         $regexp = normalizer_normalize($regexp, Normalizer::FORM_C);
         $string = normalizer_normalize($string, Normalizer::FORM_C);
     }
     return preg_match($regexp, trim($string));
 }