function mb_ereg_match_all($pattern, $subject, array &$subpatterns) { if (!mb_ereg_search_init($subject, $pattern)) { return false; } $subpatterns = array(); while ($r = mb_ereg_search_regs()) { $subpatterns[] = $r; } return true; }
public static function splitExtension($filename) { mb_regex_encoding('UTF-8'); $re = "(.*?)((\\.[[A-Z][a-z][0-9]]+)*)\$"; if ($filename && mb_ereg_search_init($filename, $re)) { $matches = mb_ereg_search_regs($re); return array_slice($matches, 1, 2); } else { return [$filename, '']; } }
public function translateArgs($lang_string, $args) { $lang_string = $this->translate($lang_string); if (mb_ereg_search_init($lang_string)) { while (false != ($vars = mb_ereg_search_regs("{[^{]*}"))) { foreach ($vars as $curly_pattern) { $pattern = mb_substr($curly_pattern, 1, mb_strlen($curly_pattern) - 2); $value = $args[$pattern]; if (!isset($value)) { $value = $pattern . '-missing'; } $lang_string = mb_ereg_replace($curly_pattern, $value, $lang_string); } } } return $lang_string; }
/* Codeine * @author bergstein@trickyplan.com * @description * @package Codeine * @version 8.x */ setFn('Match', function ($Call) { $Pockets = null; mb_ereg($Call['Pattern'], $Call['Value'], $Pockets, $Call['Regex Options']); return $Pockets; }); setFn('All', function ($Call) { $Results = []; mb_ereg_search_init($Call['Value'], $Call['Pattern'], $Call['Regex Options']); $Result = mb_ereg_search(); if ($Result) { $Result = mb_ereg_search_getregs(); //get first result do { foreach ($Result as $IX => $Value) { $Results[$IX][] = $Value; } $Result = mb_ereg_search_regs(); //get next result } while ($Result); } else { $Results = false; } return $Results; });
public function findLangs($dir = '') { if (!in_array($dir, self::$PARSED_PATHS)) { $baseDir = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($dir)); foreach ($baseDir as $file) { if ($file->isFile()) { if (in_array($file->getExtension(), self::$ALLOW_EXTENSIONS) && !strstr($file->getBasename(), 'jsLangs')) { $content = @file($file->getPathname()); $implode_content = implode(' ', $content); $lang_exist = FALSE; foreach (self::$PARSE_REGEXPR as $regexpr) { $lang_exist = $lang_exist || preg_match('/' . $regexpr . '/', $implode_content); } if ($lang_exist) { foreach ($content as $line_number => $line) { foreach (self::$PARSE_REGEXPR as $regexpr) { $lang = array(); mb_regex_encoding("UTF-8"); mb_ereg_search_init($line, $regexpr); $lang = mb_ereg_search(); if ($lang) { $lang = mb_ereg_search_getregs(); //get first result do { $origin = mb_ereg_replace('!\\s+!', ' ', $lang[1]); if (!self::$FINDED_LANGS[$origin]) { self::$FINDED_LANGS[$origin] = array(); } if ($file->getExtension() == 'js') { self::$FINDED_JS_LANGS[$origin] = $origin; } $path = str_replace("\\", "/", $file->getPathname()); array_push(self::$FINDED_LANGS[$origin], $path . ':' . ($line_number + 1)); $lang = mb_ereg_search_regs(); //get next result } while ($lang); } } } } } } } } self::$PARSED_PATHS[] = $dir; $data = array('parsed_langs' => self::$FINDED_LANGS, 'js_langs' => self::$FINDED_JS_LANGS); self::$FINDED_LANGS = array(); self::$FINDED_JS_LANGS = array(); return $data; }
/** * Turns text into an array of words */ function split_message($text) { global $config; // Split words if ($this->pcre_properties) { $text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); } else if ($this->mbstring_regex) { $text = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($text))); } else { $text = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); } if ($this->pcre_properties) { $matches = array(); preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches); $text = $matches[1]; } else if ($this->mbstring_regex) { mb_ereg_search_init($text, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)'); $text = array(); while (($word = mb_ereg_search_regs())) { $text[] = $word[1]; } } else { $matches = array(); preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches); $text = $matches[1]; } // remove too short or too long words $text = array_values($text); for ($i = 0, $n = sizeof($text); $i < $n; $i++) { $text[$i] = trim($text[$i]); if (utf8_strlen($text[$i]) < $config['fulltext_mysql_min_word_len'] || utf8_strlen($text[$i]) > $config['fulltext_mysql_max_word_len']) { unset($text[$i]); } } return array_values($text); }
<?php $subject = "foo bar bà€œz baz"; $pattern = '\\s+'; $position = 13; mb_regex_encoding('utf-8'); mb_internal_encoding('utf-8'); mb_ereg_search_init($subject, '\\G' . $pattern, 'msi'); mb_ereg_search_setpos($position); var_dump(mb_ereg_search_regs());
public function substitueVariables(&$variables, $lang, $str) { $langstr = $str; // replace variables in string if (mb_ereg_search_init($langstr)) { while (false != ($vars = mb_ereg_search_regs("%[^%]*%"))) { foreach ($vars as $curly_pattern) { // $curly_pattern contatins %pattern% in replacement string $pattern = mb_substr($curly_pattern, 1, mb_strlen($curly_pattern) - 2); // avoid recursive loop if ($pattern != $str) { if (isset($variables[$lang][$pattern])) { $pattern_replacement = $variables[$lang][$pattern]; $langstr = mb_ereg_replace($curly_pattern, $pattern_replacement, $langstr); } } } } } return $langstr; }
public static function capitalizeTitleCase($str, $isName = false, $mustCap = true) { $ret = ''; if ($str) { mb_ereg_search_init($str, StructuredData::$SPLITTER_REGEX); $m = mb_ereg_search_regs(); while ($m) { $w = $m[0]; $ucw = mb_convert_case($w, MB_CASE_UPPER); $lcw = mb_convert_case($w, MB_CASE_LOWER); if ($isName && mb_strlen($w) > 1 && $w == $ucw) { // turn all-uppercase names into all-lowercase if (mb_strlen($w) > 3 && (mb_substr($w, 0, 2) == 'MC' || mb_substr($w, 0, 2) == "O'")) { $w = mb_substr($w, 0, 1) . mb_substr($lcw, 1, 1) . mb_substr($w, 2, 1) . mb_substr($lcw, 3); } else { $w = $lcw; } } if (isset(StructuredData::$UPPERCASE_WORDS[$ucw]) || $w == $ucw) { // upper -> upper $ret .= $ucw; } else { if (!$mustCap && isset(StructuredData::$NAME_WORDS[$lcw])) { // if w is a name-word, keep as-is $ret .= $w; } else { if (!$isName && !$mustCap && isset(StructuredData::$LOWERCASE_WORDS[$lcw])) { // upper/mixed/lower -> lower $ret .= $lcw; } else { if ($w == $lcw) { // lower -> mixed $ret .= mb_convert_case($w, MB_CASE_TITLE); } else { // mixed -> mixed $ret .= $w; } } } } $m = mb_ereg_search_regs(); $w = trim($w); $mustCap = !$isName && ($w == ':' || $w == '?' || $w == '!'); } } return $ret; }
<?php mb_ereg_search_init("", "", ""); mb_split("", ""); mb_ereg_search_regs();
/** * While this is not a complete fix (e.g. Ä is not handled as expected), * this method does order testa before testA before testb before testB (rather than testA testB testa testb). */ public static function naturalOrderCompare($a, $b) { // I guess this is always the case? $ENCODING = 'UTF-8'; $len = min(mb_strlen($a, $ENCODING), mb_strlen($b, $ENCODING)); mb_regex_encoding($ENCODING); $aa = array(); $i = 0; if (mb_ereg_search_init($a, '.', 'suX') === TRUE) { while ($i < $len && ($res = mb_ereg_search_regs()) !== FALSE) { if (count($res) !== 1) { exit('WTF why is the count != 1'); } $aa[] = $res[0]; $i++; } } $i = 0; if (mb_ereg_search_init($b, '.', 'suX') === TRUE) { while (($res = mb_ereg_search_regs()) !== FALSE) { if (count($res) !== 1) { exit('WTF why is the count != 1'); } $aaa = $aa[$i]; $bbb = $res[0]; if ($bbb !== $aaa && mb_strtolower($aaa) === mb_strtolower($bbb)) { return self::codepointDec($bbb) - self::codepointDec($aaa); } $i++; } } return strnatcmp($a, $b); }
private function addWords($fullid, $text, $weight) { if (!$text) { return; } if (self::$ismb) { mb_ereg_search_init($text, "\\w+"); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { $word = mb_strtolower($match[0], 'UTF-8'); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } $match = mb_ereg_search_regs(); } while ($match); } } else { preg_match_all("/\\w+/", $text, $matches); foreach ($matches[0] as $word) { $word = strtolower($word); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } } } }
/** * Regular expression split and return all parts. * * @param string $pattern Pattern * @param string $subject Subject * @param int $limit Limit * @param string $option Option * @return string[] Array of split parts, array with original string otherwise * @throws MbRegexException When compilation error occurs * @link http://php.net/function.mb-split.php */ public static function split($pattern, $subject, $option = '', $limit = -1) { static::setUp($pattern); $position = 0; $lastPosition = 0; $res = array(); $subjectLen = \mb_strlen($subject); do { \mb_ereg_search_init($subject, $pattern, $option); \mb_ereg_search_setpos($position); $matches = \mb_ereg_search_regs(); if ($matches === false) { break; } $position = \mb_ereg_search_getpos(); $res[] = \mb_substr($subject, $lastPosition, $position - \mb_strlen($matches[0]) - $lastPosition); $lastPosition = $position; } while ($position < $subjectLen && --$limit !== 1); if ($lastPosition <= $subjectLen) { $res[] = \mb_substr($subject, $lastPosition); } static::tearDown(); return $res; }
/** * @param $pattern * @param $subject * @param array $matches * @param string $option * @param int $offset * @return bool */ function mb_ereg_match_all($pattern, $subject, array &$matches, $option = 'msr', $offset = 0) { @mb_ereg_search_setpos($offset); if (!mb_ereg_search_init($subject, $pattern, $option)) { return false; } $matches = array(); while ($r = mb_ereg_search_regs()) { $matches[] = $r; } return !empty($matches); }
$str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); var_dump(mb_ereg_search_setpos(15)); $r = mb_ereg_search_regs(); // get next result var_dump($r == array("pÜ")); $str = "PrÜÝ" . "fung abc pÜ"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str); $r = mb_ereg_search_regs("abc", "ms"); var_dump($r); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); $date = "1973-04-30"; mb_ereg("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", $date, $regs); var_dump($regs[3]); var_dump($regs[2]); var_dump($regs[1]); var_dump($regs[0]);
protected function match($pattern, $position = null, $options = 'msi') { /*{{{*/ if (null === $position) { $position = $this->bytepos; } if ($this->position >= $this->length) { return false; } mb_ereg_search_init($this->text, '\\G' . $pattern, $options); mb_ereg_search_setpos($position); return mb_ereg_search_regs(); }
<?php // homepage: //$mb_str = "Алексей Федорович Карамазов был Алексей Федорович Карамазов был kyrillischer string string"; // = "Lorem ipsum dolor sit amet" mb_ereg_search_init("Алексей Федорович Карамазов был Алексей Федорович Карамазов был"); $match = mb_ereg_search_regs("ов"); var_dump($match);
/** * 生成缩略名 * * @access public * @param string $str 需要生成缩略名的字符串 * @param string $default 默认的缩略名 * @param integer $maxLength 缩略名最大长度 * @return string */ public static function slugName($str, $default = NULL, $maxLength = 128) { $str = trim($str); if (!strlen($str)) { return $default; } if (__TYPECHO_MB_SUPPORTED__) { mb_regex_encoding(self::$charset); mb_ereg_search_init($str, "[\\w" . preg_quote('_-') . "]+"); $result = mb_ereg_search(); $return = ''; if ($result) { $regs = mb_ereg_search_getregs(); $pos = 0; do { $return .= ($pos > 0 ? '-' : '') . $regs[0]; $pos++; } while ($regs = mb_ereg_search_regs()); } $str = $return; } else { if ('UTF-8' == strtoupper(self::$charset)) { if (preg_match_all("/[\\w" . preg_quote('_-') . "]+/u", $str, $matches)) { $str = implode('-', $matches[0]); } } else { $str = str_replace(array("'", ":", "\\", "/", '"'), "", $str); $str = str_replace(array("+", ",", ' ', ',', ' ', ".", "?", "=", "&", "!", "<", ">", "(", ")", "[", "]", "{", "}"), "-", $str); } } $str = trim($str, '-_'); $str = !strlen($str) ? $default : $str; return substr($str, 0, $maxLength); }
public function &load($data) { $this->ical = false; $regex_opt = 'mib'; // Lines in the string $lines = mb_split('[\\r\\n]+', $data); // Delete empty ones $last = count($lines); for ($i = 0; $i < $last; $i++) { if (trim($lines[$i]) == "") { unset($lines[$i]); } } $lines = array_values($lines); // First and last items $first = 0; $last = count($lines) - 1; if (!(mb_ereg_match('^BEGIN:VCALENDAR', $lines[$first], $regex_opt) and mb_ereg_match('^END:VCALENDAR', $lines[$last], $regex_opt))) { $first = null; $last = null; foreach ($lines as $i => &$line) { if (mb_ereg_match('^BEGIN:VCALENDAR', $line, $regex_opt)) { $first = $i; } if (mb_ereg_match('^END:VCALENDAR', $line, $regex_opt)) { $last = $i; break; } } } // Procesing if (!is_null($first) and !is_null($last)) { $lines = array_slice($lines, $first + 1, $last - $first - 1, true); $group = null; $parentgroup = null; $this->ical = []; $addTo = []; $addToElement = null; reset($lines); $line = true; while (true) { $line = each($lines); if ($line === false) { break; } $line = current($lines); if (substr($line, 0, 2) === 'X-' or trim($line) == '') { continue; } $clave = null; $pattern = '^(BEGIN|END)\\:(.+)$'; // (VALARM|VTODO|VJOURNAL|VEVENT|VFREEBUSY|VCALENDAR|DAYLIGHT|VTIMEZONE|STANDARD) mb_ereg_search_init($line); $regs = mb_ereg_search_regs($pattern, $regex_opt); if ($regs) { // $regs // 0 => BEGIN:VEVENT // 1 => BEGIN // 2 => VEVENT switch ($regs[1]) { case 'BEGIN': if (!is_null($group)) { $parentgroup = $group; } $group = trim($regs[2]); // Adding new values to groups if (is_null($parentgroup)) { if (!array_key_exists($group, $this->ical)) { $this->ical[$group] = [null]; } else { $this->ical[$group][] = null; } } else { if (!array_key_exists($parentgroup, $this->ical)) { $this->ical[$parentgroup] = [$group => [null]]; } if (!array_key_exists($group, $this->ical[$parentgroup])) { $this->ical[$parentgroup][$group] = [null]; } else { $this->ical[$parentgroup][$group][] = null; } } break; case 'END': if (is_null($group)) { $parentgroup = null; } $group = null; break; } continue; } // There are cases like "ATTENDEE" that may take several lines. if (!in_array($line[0], [" ", "\t"]) and strpos(':', $line) === false) { $r = current($lines); $concatenar = next($lines); while ($concatenar and in_array($concatenar[0], [" ", "\t"])) { $r .= substr($concatenar, 1); $concatenar = next($lines); } prev($lines); if ($r !== $line) { $line = $r; } } if (!in_array($line[0], [" ", "\t"])) { $this->addItem($line, $group, $parentgroup); } else { $this->concatItem($line); } } } return $this->ical; }
/** * split text to search tokens * * @access private * @param string $text 'UTF-8' encoded search text * @return array array of search text token */ function _split_to_tokens($text) { $tokens = array(); // set search token patterns // 1. double quoted phrase // 2. single byte word contains html entities and latin1 letters // 3. multi byte word // 4. symbol - !#$%&'()*+,-./:;<=>?@[\]~_`{|}~ and latin1 supplement symbol $pattern = sprintf('%s|%s|%s|%s', $this->_regex_patterns['phrase'], $this->_regex_patterns['sbword'], $this->_regex_patterns['mbword'], $this->_regex_patterns['symbol']); mb_ereg_search_init($text, $pattern); $len = strlen($text); for ($i = 0; $i < $len; $i = mb_ereg_search_getpos()) { mb_ereg_search_setpos($i); $regs = mb_ereg_search_regs(); if ($regs === false) { break; } // put back token encoding if changed to 'UTF-8' $tokens[] = $regs[0]; } return $tokens; }
public static function mark($html, $words = array()) { if (!$words || count($words) <= 0) { return $html; } $ismb = function_exists('mb_ereg_search'); $inlineTags = 'b|i|em|strong|tt|big|small|strike|u|span|a'; $ignoreTags = 'script|style|textarea'; $pattern = ''; foreach ($words as $word) { if ($pattern != '') { $pattern .= '|'; } $pattern .= $ismb ? mb_strtolower($word, 'UTF-8') : strtolower($word); if ($ismb && mb_strlen($word, 'UTF-8') >= 3 || !$ismb && strlen($word) >= 3) { $pattern .= '\\w*'; } } $newhtml = ''; $currPart = ''; $currWord = ''; $ignore = false; if ($ismb) { mb_regex_encoding('UTF-8'); mb_ereg_search_init($html, '(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})'); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { if ($ignore || $match[2] || $match[4] || $match[3] && !mb_ereg_match("\\/?({$inlineTags})", mb_strtolower($match[3], 'UTF-8'))) { if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && mb_ereg_match("{$ignoreTags}", mb_strtolower($match[3], 'UTF-8')) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= mb_strtolower($match[1], 'UTF-8'); } $currPart .= $match[0]; } $match = mb_ereg_search_regs(); } while ($match); if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } else { if (preg_match_all('/(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})/i', $html, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { if ($ignore || $match[2] || $match[4] || $match[3] && !preg_match("/^\\/?({$inlineTags})\$/i", $match[3])) { if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && preg_match("/^{$ignoreTags}\$/i", $match[3]) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= strtolower($match[1]); } $currPart .= $match[0]; } } if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } return $newhtml; }