commandUrlCleanupCompat() static public method

static public commandUrlCleanupCompat ( $text, $charsetName )
Beispiel #1
0
 function executeCommandCode(&$text, $command, $charsetName)
 {
     if ($command['command'] == 'url_cleanup_iri') {
         $text = eZCharTransform::commandUrlCleanupIRI($text, $charsetName);
         return true;
     } else {
         if ($command['command'] == 'url_cleanup') {
             $text = eZCharTransform::commandUrlCleanup($text, $charsetName);
             return true;
         } else {
             if ($command['command'] == 'url_cleanup_compat') {
                 $text = eZCharTransform::commandUrlCleanupCompat($text, $charsetName);
                 return true;
             } else {
                 if ($command['command'] == 'identifier_cleanup') {
                     $text = strtolower($text);
                     $text = preg_replace(array("#[^a-z0-9_ ]#", "/ /", "/__+/", "/^_|_\$/"), array(" ", "_", "_", ""), $text);
                     return true;
                 } else {
                     if ($command['command'] == 'search_cleanup') {
                         $nonCJKCharsets = $this->nonCJKCharsets();
                         if (!in_array($charsetName, $nonCJKCharsets)) {
                             // 4 Add spaces after chinese / japanese / korean multibyte characters
                             $codec = eZTextCodec::instance(false, 'unicode');
                             $unicodeValueArray = $codec->convertString($text);
                             $normalizedTextArray = array();
                             $bFlag = false;
                             foreach (array_keys($unicodeValueArray) as $valueKey) {
                                 // Check for word characters that should be broken up for search
                                 if ($unicodeValueArray[$valueKey] >= 12289 and $unicodeValueArray[$valueKey] <= 12542 or $unicodeValueArray[$valueKey] >= 13312 and $unicodeValueArray[$valueKey] <= 40863 or $unicodeValueArray[$valueKey] >= 44032 and $unicodeValueArray[$valueKey] <= 55203) {
                                     if ($bFlag) {
                                         $normalizedTextArray[] = $unicodeValueArray[$valueKey];
                                     }
                                     $normalizedTextArray[] = 32;
                                     // A space
                                     $normalizedTextArray[] = $unicodeValueArray[$valueKey];
                                     $bFlag = true;
                                 } else {
                                     if ($bFlag) {
                                         $normalizedTextArray[] = 32;
                                         // A space
                                     }
                                     $normalizedTextArray[] = $unicodeValueArray[$valueKey];
                                     $bFlag = false;
                                 }
                             }
                             if ($bFlag) {
                                 $normalizedTextArray[count($normalizedTextArray) - 1] = 32;
                             }
                             $revCodec = eZTextCodec::instance('unicode', false);
                             // false means use internal charset
                             $text = $revCodec->convertString($normalizedTextArray);
                         }
                         // Make sure dots inside words/numbers are kept, the rest is turned into space
                         $text = preg_replace(array("#(\\.){2,}#", "#^\\.#", "#\\s\\.#", "#\\.\\s#", "#\\.\$#", "#([^0-9])%#"), array(" ", " ", " ", " ", " ", "\$1 "), $text);
                         $ini = eZINI::instance();
                         if ($ini->variable('SearchSettings', 'EnableWildcard') != 'true') {
                             $text = str_replace("*", " ", $text);
                         }
                         $charset = eZTextCodec::internalCharset();
                         $hasUTF8 = $charset == "utf-8";
                         if ($hasUTF8) {
                             $text = preg_replace("#(\\s+)#u", " ", $text);
                         } else {
                             $text = preg_replace("#(\\s+)#", " ", $text);
                         }
                         return true;
                     } else {
                         $ini = eZINI::instance('transform.ini');
                         $commands = $ini->variable('Extensions', 'Commands');
                         if (isset($commands[$command['command']])) {
                             list($path, $className) = explode(':', $commands[$command['command']], 2);
                             if (file_exists($path)) {
                                 include_once $path;
                                 $text = call_user_func_array(array($className, 'executeCommand'), array($text, $command['command'], $charsetName));
                                 return true;
                             } else {
                                 eZDebug::writeError("Could not locate include file '{$path}' for transformation '" . $command['command'] . "'");
                             }
                         }
                     }
                 }
             }
         }
     }
     return false;
 }
<?php

// Cached transformation data
// Type: Group:urlalias_compat
// Charset: utf-8
// Cached transformation data
$data = array('table' => array('' => ' ', ' ' => ' ', '״' => '"', '׳' => '\\"', 'ײ' => 'yy', 'ױ' => 'wy', 'װ' => 'ww', 'ת' => 't', 'ש' => 's', 'ר' => 'r', 'ק' => 'q', 'צ' => 'z', 'ץ' => 'z', 'פ' => 'p', 'ף' => 'p', 'ע' => '\\"', 'ס' => 's', 'נ' => 'n', 'ן' => 'n', 'מ' => 'm', 'ם' => 'm', 'ל' => 'l', 'כ' => 'k', 'ך' => 'k', 'י' => 'y', 'ט' => 't', 'ח' => 'h', 'ז' => 'z', 'ו' => 'o', 'ה' => 'h', 'ד' => 'd', 'ג' => 'g', 'ב' => 'b', 'א' => '\\"', '׃' => ':', '׀' => '|', 'ֿ' => '', '־' => '\\"', 'ֽ' => '', 'ּ' => '', 'ֻ' => 'u', 'ֺ' => '', 'ֹ' => 'o', 'ָ' => 'e', 'ַ' => 'a', 'ֶ' => 'e', 'ֵ' => 'e', 'ִ' => 'i', 'ֳ' => 'o', 'ֲ' => 'a', 'ֱ' => 'e', 'ְ' => '', '֯' => '' . "" . '', '֮' => '' . "" . '', '֭' => '' . "" . '', '֬' => '' . "" . '', '֫' => '' . "" . '', '֪' => '' . "" . '', '֩' => '' . "" . '', '֨' => '' . "" . '', '֧' => '' . "" . '', '֦' => '' . "" . '', '֥' => '' . "" . '', '֤' => '' . "" . '', '֣' => '' . "" . '', '֡' => '' . "" . '', '֠' => '' . "" . '', '֟' => '' . "" . '', '֞' => '' . "" . '', '֝' => '' . "" . '', '֜' => '' . "" . '', '֛' => '' . "" . '', '֚' => '' . "" . '', '֙' => '' . "" . '', '֘' => '' . "" . '', '֗' => '' . "" . '', '֖' => '' . "" . '', '֕' => '' . "" . '', '֔' => '' . "" . '', '֓' => '' . "" . '', '֒' => '' . "" . '', '֑' => '' . "" . '', 'Ԏ' => 'ԏ', 'Ԍ' => 'ԍ', 'Ԋ' => 'ԋ', 'Ԉ' => 'ԉ', 'Ԇ' => 'ԇ', 'Ԅ' => 'ԅ', 'Ԃ' => 'ԃ', 'Ԁ' => 'ԁ', 'ӹ' => 'y', 'Ӹ' => 'y', 'ӵ' => 'ch', 'Ӵ' => 'ch', 'ӳ' => 'u', 'Ӳ' => 'u', 'ӱ' => 'u', 'Ӱ' => 'u', 'ӯ' => 'u', 'Ӯ' => 'u', 'ӭ' => 'e', 'Ӭ' => 'e', 'ӫ' => 'ө', 'Ӫ' => 'ө', 'Ө' => 'ө', 'ӧ' => 'o', 'Ӧ' => 'o', 'ӥ' => 'i', 'Ӥ' => 'i', 'ӣ' => 'i', 'Ӣ' => 'i', 'Ӡ' => 'ӡ', 'ӟ' => 'z', 'Ӟ' => 'z', 'ӝ' => 'zh', 'Ӝ' => 'zh', 'ӛ' => 'ә', 'Ӛ' => 'ә', 'Ә' => 'ә', 'ӗ' => 'e', 'Ӗ' => 'e', 'Ӕ' => 'ӕ', 'ӓ' => 'a', 'Ӓ' => 'a', 'ӑ' => 'a', 'Ӑ' => 'a', 'Ӎ' => 'ӎ', 'Ӌ' => 'ӌ', 'Ӊ' => 'ӊ', 'Ӈ' => 'ӈ', 'Ӆ' => 'ӆ', 'Ӄ' => 'ӄ', 'ӂ' => 'zh', 'Ӂ' => 'zh', 'Ҿ' => 'ҿ', 'Ҽ' => 'ҽ', 'Һ' => 'һ', 'Ҹ' => 'ҹ', 'Ҷ' => 'ҷ', 'Ҵ' => 'ҵ', 'Ҳ' => 'ҳ', 'Ұ' => 'ұ', 'Ү' => 'ү', 'Ҭ' => 'ҭ', 'Ҫ' => 'ҫ', 'Ҩ' => 'ҩ', 'Ҧ' => 'ҧ', 'Ҥ' => 'ҥ', 'Ң' => 'ң', 'Ҡ' => 'ҡ', 'Ҟ' => 'ҟ', 'Ҝ' => 'ҝ', 'Қ' => 'қ', 'Ҙ' => 'ҙ', 'Җ' => 'җ', 'Ҕ' => 'ҕ', 'Ғ' => 'ғ', 'Ґ' => 'ґ', 'Ҏ' => 'ҏ', 'Ҍ' => 'ҍ', 'Ҋ' => 'ҋ', 'Ҁ' => 'ҁ', 'Ѿ' => 'ѿ', 'Ѽ' => 'ѽ', 'Ѻ' => 'ѻ', 'Ѹ' => 'ѹ', 'ѷ' => 'ѵ', 'Ѷ' => 'ѵ', 'Ѵ' => 'ѵ', 'Ѳ' => 'ѳ', 'Ѱ' => 'ѱ', 'Ѯ' => 'ѯ', 'Ѭ' => 'ѭ', 'Ѫ' => 'ѫ', 'Ѩ' => 'ѩ', 'Ѧ' => 'ѧ', 'Ѥ' => 'ѥ', 'Ѣ' => 'ѣ', 'Ѡ' => 'ѡ', 'џ' => 'dz', 'ў' => 'v', 'ѝ' => 'i', 'ќ' => 'kj', 'ћ' => 'c', 'њ' => 'nj', 'љ' => 'lj', 'ј' => 'j', 'ї' => 'yi', 'і' => 'i', 'ѕ' => 'z', 'є' => 'e', 'ѓ' => 'g', 'ђ' => 'dj', 'ё' => 'io', 'ѐ' => 'ie', 'я' => 'ya', 'ю' => 'yu', 'э' => 'e', 'ь' => '\\"', 'ы' => 'y', 'ъ' => '\\"', 'щ' => 'sch', 'ш' => 'sh', 'ч' => 'ch', 'ц' => 'c', 'х' => 'h', 'ф' => 'f', 'у' => 'u', 'т' => 't', 'с' => 's', 'р' => 'r', 'п' => 'p', 'о' => 'o', 'н' => 'n', 'м' => 'm', 'л' => 'l', 'к' => 'k', 'й' => 'j', 'и' => 'i', 'з' => 'z', 'ж' => 'zh', 'е' => 'e', 'д' => 'd', 'г' => 'g', 'в' => 'v', 'б' => 'b', 'а' => 'a', 'Я' => 'ya', 'Ю' => 'yu', 'Э' => 'e', 'Ь' => '\\"', 'Ы' => 'y', 'Ъ' => '\\"', 'Щ' => 'sch', 'Ш' => 'sh', 'Ч' => 'ch', 'Ц' => 'c', 'Х' => 'h', 'Ф' => 'f', 'У' => 'u', 'Т' => 't', 'С' => 's', 'Р' => 'r', 'П' => 'p', 'О' => 'o', 'Н' => 'n', 'М' => 'm', 'Л' => 'l', 'К' => 'k', 'Й' => 'j', 'И' => 'i', 'З' => 'z', 'Ж' => 'zh', 'Е' => 'e', 'Д' => 'd', 'Г' => 'g', 'В' => 'v', 'Б' => 'b', 'А' => 'a', 'Џ' => 'dz', 'Ў' => 'v', 'Ѝ' => 'i', 'Ќ' => 'kj', 'Ћ' => 'ts', 'Њ' => 'nj', 'Љ' => 'lj', 'Ј' => 'j', 'Ї' => 'yi', 'І' => 'ii', 'Ѕ' => 'ds', 'Є' => 'ie', 'Ѓ' => 'g', 'Ђ' => 'd', 'Ё' => 'io', 'Ѐ' => 'ie', 'ϻ' => '', 'Ϻ' => '', 'Ϲ' => '', 'ϸ' => '', 'Ϸ' => '', '϶' => 'e', 'ϵ' => 'e', 'ϴ' => '', 'ϳ' => 'j', 'ϲ' => '', 'ϱ' => 'r', 'ϰ' => '', 'ϯ' => '', 'Ϯ' => '', 'ϭ' => '', 'Ϭ' => '', 'ϫ' => '', 'Ϫ' => '', 'ϩ' => '', 'Ϩ' => '', 'ϧ' => '', 'Ϧ' => '', 'ϥ' => '', 'Ϥ' => '', 'ϣ' => '', 'Ϣ' => '', 'ϡ' => '900', 'Ϡ' => 's', 'ϟ' => '90', 'Ϟ' => 'k', 'ϝ' => 'f', 'Ϝ' => 'f', 'ϛ' => 's', 'Ϛ' => '6', 'ϙ' => 'k', 'Ϙ' => 'k', 'ϗ' => '&', 'ϖ' => 'pi', 'ϕ' => 'phi', 'ϔ' => 'y', 'ϓ' => 'y', 'ϒ' => 'y', 'ϑ' => 'th', 'ϐ' => 'b', 'ώ' => 'ho', 'ύ' => 'hu', 'ό' => 'ho', 'ϋ' => 'y', 'ϊ' => 'i', 'ω' => 'o', 'ψ' => 'ps', 'χ' => 'ch', 'φ' => 'ph', 'υ' => 'y', 'τ' => 't', 'σ' => 's', 'ς' => 's', 'ρ' => 'r', 'π' => 'p', 'ο' => 'o', 'ξ' => 'x', 'ν' => 'n', 'μ' => 'm', 'λ' => 'l', 'κ' => 'k', 'ι' => 'i', 'θ' => 'th', 'η' => 'e', 'ζ' => 'z', 'ε' => 'e', 'δ' => 'd', 'γ' => 'g', 'β' => 'v', 'α' => 'a', 'ΰ' => 'y', 'ί' => 'hi', 'ή' => 'he', 'έ' => 'he', 'ά' => 'ha', 'Ϋ' => 'y', 'Ϊ' => 'i', 'Ω' => 'o', 'Ψ' => 'ps', 'Χ' => 'ch', 'Φ' => 'ph', 'Υ' => 'y', 'Τ' => 't', 'Σ' => 's', 'Ρ' => 'r', 'Π' => 'p', 'Ο' => 'o', 'Ξ' => 'x', 'Ν' => 'n', 'Μ' => 'm', 'Λ' => 'l', 'Κ' => 'k', 'Ι' => 'i', 'Θ' => 'th', 'Η' => 'e', 'Ζ' => 'z', 'Ε' => 'e', 'Δ' => 'd', 'Γ' => 'g', 'Β' => 'v', 'Α' => 'a', 'ΐ' => 'i', 'Ώ' => 'ho', 'Ύ' => 'hu', 'Ό' => 'ho', 'Ί' => 'hi', 'Ή' => 'he', 'Έ' => 'he', '·' => '·', 'Ά' => 'ha', '΅' => '¨', '΄' => ' ', ';' => '?', 'ͺ' => ' ', '͵' => '\\"', 'ʹ' => '\\"', '˝' => '"', 'ˏ' => '"', 'ˎ' => '"', 'ˋ' => '"', 'ˊ' => '"', 'ſ' => 's', 'ž' => 'z', 'Ž' => 'z', 'ż' => 'z', 'Ż' => 'z', 'ź' => 'z', 'Ź' => 'z', 'Ÿ' => 'y', 'ŷ' => 'y', 'Ŷ' => 'y', 'ŵ' => 'w', 'Ŵ' => 'w', 'ų' => 'u', 'Ų' => 'u', 'ű' => 'u', 'Ű' => 'u', 'ů' => 'u', 'Ů' => 'u', 'ŭ' => 'u', 'Ŭ' => 'u', 'ū' => 'u', 'Ū' => 'u', 'ũ' => 'u', 'Ũ' => 'u', 'ŧ' => 't', 'Ŧ' => 't', 'ť' => 't', 'Ť' => 't', 'ţ' => 't', 'Ţ' => 't', 'š' => 's', 'Š' => 's', 'ş' => 's', 'Ş' => 's', 'ŝ' => 's', 'Ŝ' => 's', 'ś' => 's', 'Ś' => 's', 'ř' => 'r', 'Ř' => 'r', 'ŗ' => 'r', 'Ŗ' => 'r', 'ŕ' => 'r', 'Ŕ' => 'r', 'œ' => 'ae', 'Œ' => 'ae', 'ő' => 'o', 'Ő' => 'o', 'ŏ' => 'o', 'Ŏ' => 'o', 'ō' => 'o', 'Ō' => 'o', 'ŋ' => 'n', 'Ŋ' => 'n', 'ʼn' => '"n', 'ň' => 'n', 'Ň' => 'n', 'ņ' => 'n', 'Ņ' => 'n', 'ń' => 'n', 'Ń' => 'n', 'ł' => 'l', 'Ł' => 'l', 'ŀ' => 'l', 'Ŀ' => 'l', 'ľ' => 'l', 'Ľ' => 'l', 'ļ' => 'l', 'Ļ' => 'l', 'ĺ' => 'l', 'Ĺ' => 'l', 'ĸ' => 'k', 'ķ' => 'k', 'Ķ' => 'k', 'ĵ' => 'j', 'Ĵ' => 'j', 'ij' => 'ij', 'IJ' => 'ij', 'ı' => 'i', 'İ' => 'i', 'į' => 'i', 'Į' => 'i', 'ĭ' => 'i', 'Ĭ' => 'i', 'ī' => 'i', 'Ī' => 'i', 'ĩ' => 'i', 'Ĩ' => 'i', 'ħ' => 'h', 'Ħ' => 'h', 'ĥ' => 'h', 'Ĥ' => 'h', 'ģ' => 'g', 'Ģ' => 'g', 'ġ' => 'g', 'Ġ' => 'g', 'ğ' => 'g', 'Ğ' => 'g', 'ĝ' => 'g', 'Ĝ' => 'g', 'ě' => 'e', 'Ě' => 'e', 'ę' => 'e', 'Ę' => 'e', 'ė' => 'e', 'Ė' => 'e', 'ĕ' => 'e', 'Ĕ' => 'e', 'ē' => 'e', 'Ē' => 'e', 'đ' => 'd', 'Đ' => 'd', 'ď' => 'd', 'Ď' => 'd', 'č' => 'c', 'Č' => 'c', 'ċ' => 'c', 'Ċ' => 'c', 'ĉ' => 'c', 'Ĉ' => 'c', 'ć' => 'c', 'Ć' => 'c', 'ą' => 'a', 'Ą' => 'a', 'ă' => 'a', 'Ă' => 'a', 'ā' => 'a', 'Ā' => 'a', 'ÿ' => 'y', 'þ' => 'th', 'ý' => 'y', 'ü' => 'ue', 'û' => 'u', 'ú' => 'u', 'ù' => 'u', 'ø' => 'oe', '÷' => '/', 'ö' => 'oe', 'õ' => 'o', 'ô' => 'o', 'ó' => 'o', 'ò' => 'o', 'ñ' => 'n', 'ð' => 'd', 'ï' => 'i', 'î' => 'i', 'í' => 'i', 'ì' => 'i', 'ë' => 'e', 'ê' => 'e', 'é' => 'e', 'è' => 'e', 'ç' => 'c', 'æ' => 'ae', 'å' => 'aa', 'ä' => 'ae', 'ã' => 'a', 'â' => 'a', 'á' => 'a', 'à' => 'a', 'ß' => 'ss', 'Þ' => 'th', 'Ý' => 'y', 'Ü' => 'ue', 'Û' => 'u', 'Ú' => 'u', 'Ù' => 'u', 'Ø' => 'oe', '×' => '*', 'Ö' => 'oe', 'Õ' => 'o', 'Ô' => 'o', 'Ó' => 'o', 'Ò' => 'o', 'Ñ' => 'n', 'Ð' => 'd', 'Ï' => 'i', 'Î' => 'i', 'Í' => 'i', 'Ì' => 'i', 'Ë' => 'e', 'Ê' => 'e', 'É' => 'e', 'È' => 'e', 'Ç' => 'c', 'Æ' => 'ae', 'Å' => 'aa', 'Ä' => 'ae', 'Ã' => 'a', 'Â' => 'a', 'Á' => 'a', 'À' => 'a', '¿' => '?', '¾' => '3/4', '½' => '1/2', '¼' => '1/4', '»' => '"', 'º' => '1', '¹' => '1', '´' => '"', '³' => '3', '²' => '2', '±' => '+-', '®' => '(r)', '­' => '', '«' => '"', 'ª' => 'a', '©' => '(c)', '¡' => '!', ' ' => ' ', 'Ÿ' => 'y', 'œ' => 'oe', 'Œ' => 'oe', '~' => '-', '`' => '"', 'Z' => 'z', 'Y' => 'y', 'X' => 'x', 'W' => 'w', 'V' => 'v', 'U' => 'u', 'T' => 't', 'S' => 's', 'R' => 'r', 'Q' => 'q', 'P' => 'p', 'O' => 'o', 'N' => 'n', 'M' => 'm', 'L' => 'l', 'K' => 'k', 'J' => 'j', 'I' => 'i', 'H' => 'h', 'G' => 'g', 'F' => 'f', 'E' => 'e', 'D' => 'd', 'C' => 'c', 'B' => 'b', 'A' => 'a', '\'' => '"'));
$text = strtr($text, $data['table']);
$text = eZCharTransform::commandUrlCleanupCompat($text, 'utf-8');