Exemple #1
0
 /**
  * Make regular expression for case insensitive match
  * Example (non ASCII): "123_слово_test" => "123_(с|С)(л|Л)(о|О)(в|В)(о|О)_[tT][eE][sS][tT]"
  * Example (only ASCII): "123_test" => "(?i:123_test)"
  *
  * @param  string $s
  * @param  string|null $delimiter  If the optional delimiter is specified, it will also be escaped.
  *                                 This is useful for escaping the delimiter that is required by the PCRE functions.
  *                                 The / is the most commonly used delimiter.
  * @return string|bool|null        Returns FALSE if error occurred
  */
 public static function preg_quote_case_insensitive($s, $delimiter = null)
 {
     if (is_null($s)) {
         return $s;
     }
     if (self::is_ascii($s)) {
         return '(?i:' . preg_quote($s, $delimiter) . ')';
     }
     #speed improve
     $s_re = '';
     $s_lc = PHPUTF8::lowercase($s);
     if ($s_lc === false) {
         return false;
     }
     $s_uc = PHPUTF8::uppercase($s);
     if ($s_uc === false) {
         return false;
     }
     $chars_lc = PHPUTF8::str_split($s_lc);
     if ($chars_lc === false) {
         return false;
     }
     $chars_uc = PHPUTF8::str_split($s_uc);
     if ($chars_uc === false) {
         return false;
     }
     foreach ($chars_lc as $i => $char) {
         if ($chars_lc[$i] === $chars_uc[$i]) {
             $s_re .= preg_quote($chars_lc[$i], $delimiter);
         } elseif (self::is_ascii($chars_lc[$i])) {
             $s_re .= '[' . preg_quote($chars_lc[$i] . $chars_uc[$i], $delimiter) . ']';
         } else {
             $s_re .= '(' . preg_quote($chars_lc[$i], $delimiter) . '|' . preg_quote($chars_uc[$i], $delimiter) . ')';
         }
     }
     return $s_re;
 }
Exemple #2
0
 private static function _make_regexp_callback(array $m)
 {
     #$re_holes = '[\x00-\x20\-_\*\~\.\'"\^=`:]';
     #$re_holes = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]';
     $re_holes = '(?!/\\\\)[^\\p{L}\\d]';
     #non letter, non digit, non '/\'
     if ($m[0] === 'а') {
         $re = '[@аА]++           (?>[:holes:]|[@аА]+)*+';
     } elseif ($m[0] === 'з') {
         $re = '[3зЗ]++           (?>[:holes:]|[3зЗ]+)*+';
     } elseif ($m[0] === 'б') {
         $re = '[6бБ]++           (?>[:holes:]|[6бБ]+)*+';
     } elseif ($m[0] === 'л') {
         $re = '(?>[лЛ]+|/\\\\)++ (?>[:holes:]|[лЛ]+|/\\\\)*+';
     } else {
         #в PCRE-7.2 флаг /i в комбинации с /u в регулярном выражении почему-то не работает (BUG?)
         #поэтому делаем класс символов с буквами в обоих регистрах
         $char = '[' . preg_quote($m[0] . PHPUTF8::uppercase($m[0]), '~') . ']';
         $re = str_replace('$0', $char, '$0++ (?>[:holes:]|$0+)*+');
     }
     return str_replace('[:holes:]', $re_holes, $re . "\r\n");
 }