Example #1
0
function test_search($test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP')
{
    mb_regex_encoding($test_enc);
    $str = mb_convert_encoding($str, $test_enc, $in_enc);
    $look_for = mb_convert_encoding($look_for, $test_enc, $in_enc);
    mb_ereg_search_init($str, $look_for, $opt);
    while (mb_ereg_search_pos()) {
        $regs = mb_ereg_search_getregs();
        array_shift($regs);
        printf("(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding(is_array($regs) ? implode('-', $regs) : '', $in_enc, $test_enc));
    }
}
Example #2
0
 /**
  * Regular expression split and return all parts.
  *
  * @param string $pattern Pattern
  * @param string $subject Subject
  * @param int    $limit   Limit
  * @param string $option  Option
  * @return string[] Array of split parts, array with original string otherwise
  * @throws MbRegexException When compilation error occurs
  * @link http://php.net/function.mb-split.php
  */
 public static function split($pattern, $subject, $option = '', $limit = -1)
 {
     static::setUp($pattern);
     $position = 0;
     $lastPosition = 0;
     $res = array();
     $subjectLen = \mb_strlen($subject);
     do {
         \mb_ereg_search_init($subject, $pattern, $option);
         \mb_ereg_search_setpos($position);
         $matches = \mb_ereg_search_regs();
         if ($matches === false) {
             break;
         }
         $position = \mb_ereg_search_getpos();
         $res[] = \mb_substr($subject, $lastPosition, $position - \mb_strlen($matches[0]) - $lastPosition);
         $lastPosition = $position;
     } while ($position < $subjectLen && --$limit !== 1);
     if ($lastPosition <= $subjectLen) {
         $res[] = \mb_substr($subject, $lastPosition);
     }
     static::tearDown();
     return $res;
 }
Example #3
0
$num = 4;
$str = "This string has four words.";
$str = mb_ereg_replace("four", $num, $str);
var_dump($str);
$test = "http://test.com/test";
$test = mb_ereg_replace("[[:alpha:]]+://[^<>[:space:]]+[[:alnum:]/]", "<a href=\"\\0\">\\0</a>", $test);
var_dump($test);
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
var_dump($r === array("PrÜÝ" . "fung"));
var_dump(mb_ereg_search_getpos());
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
var_dump($r == array("PrÜÝ" . "fung"));
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
 /**
  * split text to search tokens
  *
  * @access private
  * @param string $text 'UTF-8' encoded search text
  * @return array array of search text token
  */
 function _split_to_tokens($text)
 {
     $tokens = array();
     // set search token patterns
     // 1. double quoted phrase
     // 2. single byte word contains html entities and latin1 letters
     // 3. multi byte word
     // 4. symbol - !#$%&'()*+,-./:;<=>?@[\]~_`{|}~ and latin1 supplement symbol
     $pattern = sprintf('%s|%s|%s|%s', $this->_regex_patterns['phrase'], $this->_regex_patterns['sbword'], $this->_regex_patterns['mbword'], $this->_regex_patterns['symbol']);
     mb_ereg_search_init($text, $pattern);
     $len = strlen($text);
     for ($i = 0; $i < $len; $i = mb_ereg_search_getpos()) {
         mb_ereg_search_setpos($i);
         $regs = mb_ereg_search_regs();
         if ($regs === false) {
             break;
         }
         // put back token encoding if changed to 'UTF-8'
         $tokens[] = $regs[0];
     }
     return $tokens;
 }