public function setBusyHours($value) { $schedule = mb_strtolower($value); try { if (mb_strpos($schedule, '(')) { $schedule = trim(mb_ereg_replace('\\(.*\\)', '', $schedule)); } mb_ereg_search_init($schedule); if (mb_ereg_search('[а-я]\\:')) { $schedule = mb_ereg_replace('^[\\w-]+\\:', '', $schedule); $schedule = mb_ereg_replace(', [\\w-]+\\:', ',', $schedule); } $schedule = trim($schedule); $dayIntervals = explode(',', $schedule); foreach ($dayIntervals as $dayInterval) { $dayInterval = trim($dayInterval); if (mb_strpos($dayInterval, 'обед') === false) { $this->parseDayInterval($dayInterval); } else { list($trash, $time) = explode(' ', $dayInterval); $this->parseLunch($time); } } foreach ($this->schedule as $day => &$workingTime) { if (isset($workingTime['from'])) { $workingTime = [$workingTime]; } } } catch (\Exception $e) { $this->schedule = 'error parsing'; } }
/** * Format a string data. * * @param string $str A string. * * @return string */ protected function formatString($str) { if (extension_loaded('mbstring')) { $originalStr = $str; $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8'); // Correct for MB_TITLE_CASE's insistence on uppercasing letters // immediately preceded by numerals, eg: 1st -> 1St $originalEncoding = mb_regex_encoding(); mb_regex_encoding('UTF-8'); // matches an upper case letter character immediately preceded by a numeral mb_ereg_search_init($str, '[0-9]\\p{Lu}'); while ($match = mb_ereg_search_pos()) { $charPos = $match[0] + 1; // Only swap it back to lowercase if it was lowercase to begin with if (mb_ereg_match('\\p{Ll}', $originalStr[$charPos])) { $str[$charPos] = mb_strtolower($str[$charPos]); } } mb_regex_encoding($originalEncoding); } else { $str = $this->lowerize($str); $str = ucwords($str); } $str = str_replace('-', '- ', $str); $str = str_replace('- ', '-', $str); return $str; }
public static function splitExtension($filename) { mb_regex_encoding('UTF-8'); $re = "(.*?)((\\.[[A-Z][a-z][0-9]]+)*)\$"; if ($filename && mb_ereg_search_init($filename, $re)) { $matches = mb_ereg_search_regs($re); return array_slice($matches, 1, 2); } else { return [$filename, '']; } }
function mb_ereg_match_all($pattern, $subject, array &$subpatterns) { if (!mb_ereg_search_init($subject, $pattern)) { return false; } $subpatterns = array(); while ($r = mb_ereg_search_regs()) { $subpatterns[] = $r; } return true; }
function test_search($test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP') { mb_regex_encoding($test_enc); $str = mb_convert_encoding($str, $test_enc, $in_enc); $look_for = mb_convert_encoding($look_for, $test_enc, $in_enc); mb_ereg_search_init($str, $look_for, $opt); while (mb_ereg_search_pos()) { $regs = mb_ereg_search_getregs(); array_shift($regs); printf("(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding(is_array($regs) ? implode('-', $regs) : '', $in_enc, $test_enc)); } }
public function translateArgs($lang_string, $args) { $lang_string = $this->translate($lang_string); if (mb_ereg_search_init($lang_string)) { while (false != ($vars = mb_ereg_search_regs("{[^{]*}"))) { foreach ($vars as $curly_pattern) { $pattern = mb_substr($curly_pattern, 1, mb_strlen($curly_pattern) - 2); $value = $args[$pattern]; if (!isset($value)) { $value = $pattern . '-missing'; } $lang_string = mb_ereg_replace($curly_pattern, $value, $lang_string); } } } return $lang_string; }
/** * A cross between mb_split and preg_split, adding the preg_split flags * to mb_split. * @param string $pattern * @param string $string * @param int $limit * @param int $flags * @return array */ private static function mbSplit($pattern, $string, $limit = -1, $flags = 0) { $strlen = strlen($string); // bytes! mb_ereg_search_init($string); $lengths = array(); $position = 0; while (($array = mb_ereg_search_pos($pattern, '')) !== false) { // capture split $lengths[] = array($array[0] - $position, false, null); // move position $position = $array[0] + $array[1]; // capture delimiter $regs = mb_ereg_search_getregs(); $lengths[] = array($array[1], true, isset($regs[1]) && $regs[1]); // Continue on? if ($position >= $strlen) { break; } } // Add last bit, if not ending with split $lengths[] = array($strlen - $position, false, null); // Substrings $parts = array(); $position = 0; $count = 1; foreach ($lengths as $length) { $is_delimiter = $length[1]; $is_captured = $length[2]; if ($limit > 0 && !$is_delimiter && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY) && ++$count > $limit) { if ($length[0] > 0 || ~$flags & PREG_SPLIT_NO_EMPTY) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position), $position) : mb_strcut($string, $position); } break; } elseif ((!$is_delimiter || $flags & PREG_SPLIT_DELIM_CAPTURE && $is_captured) && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY)) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position, $length[0]), $position) : mb_strcut($string, $position, $length[0]); } $position += $length[0]; } return $parts; }
<?php mb_ereg_search_init("", "", ""); mb_split("", ""); mb_ereg_search_regs();
public function &load($data) { $this->ical = false; $regex_opt = 'mib'; // Lines in the string $lines = mb_split('[\\r\\n]+', $data); // Delete empty ones $last = count($lines); for ($i = 0; $i < $last; $i++) { if (trim($lines[$i]) == "") { unset($lines[$i]); } } $lines = array_values($lines); // First and last items $first = 0; $last = count($lines) - 1; if (!(mb_ereg_match('^BEGIN:VCALENDAR', $lines[$first], $regex_opt) and mb_ereg_match('^END:VCALENDAR', $lines[$last], $regex_opt))) { $first = null; $last = null; foreach ($lines as $i => &$line) { if (mb_ereg_match('^BEGIN:VCALENDAR', $line, $regex_opt)) { $first = $i; } if (mb_ereg_match('^END:VCALENDAR', $line, $regex_opt)) { $last = $i; break; } } } // Procesing if (!is_null($first) and !is_null($last)) { $lines = array_slice($lines, $first + 1, $last - $first - 1, true); $group = null; $parentgroup = null; $this->ical = []; $addTo = []; $addToElement = null; reset($lines); $line = true; while (true) { $line = each($lines); if ($line === false) { break; } $line = current($lines); if (substr($line, 0, 2) === 'X-' or trim($line) == '') { continue; } $clave = null; $pattern = '^(BEGIN|END)\\:(.+)$'; // (VALARM|VTODO|VJOURNAL|VEVENT|VFREEBUSY|VCALENDAR|DAYLIGHT|VTIMEZONE|STANDARD) mb_ereg_search_init($line); $regs = mb_ereg_search_regs($pattern, $regex_opt); if ($regs) { // $regs // 0 => BEGIN:VEVENT // 1 => BEGIN // 2 => VEVENT switch ($regs[1]) { case 'BEGIN': if (!is_null($group)) { $parentgroup = $group; } $group = trim($regs[2]); // Adding new values to groups if (is_null($parentgroup)) { if (!array_key_exists($group, $this->ical)) { $this->ical[$group] = [null]; } else { $this->ical[$group][] = null; } } else { if (!array_key_exists($parentgroup, $this->ical)) { $this->ical[$parentgroup] = [$group => [null]]; } if (!array_key_exists($group, $this->ical[$parentgroup])) { $this->ical[$parentgroup][$group] = [null]; } else { $this->ical[$parentgroup][$group][] = null; } } break; case 'END': if (is_null($group)) { $parentgroup = null; } $group = null; break; } continue; } // There are cases like "ATTENDEE" that may take several lines. if (!in_array($line[0], [" ", "\t"]) and strpos(':', $line) === false) { $r = current($lines); $concatenar = next($lines); while ($concatenar and in_array($concatenar[0], [" ", "\t"])) { $r .= substr($concatenar, 1); $concatenar = next($lines); } prev($lines); if ($r !== $line) { $line = $r; } } if (!in_array($line[0], [" ", "\t"])) { $this->addItem($line, $group, $parentgroup); } else { $this->concatItem($line); } } } return $this->ical; }
public static function capitalizeTitleCase($str, $isName = false, $mustCap = true) { $ret = ''; if ($str) { mb_ereg_search_init($str, StructuredData::$SPLITTER_REGEX); $m = mb_ereg_search_regs(); while ($m) { $w = $m[0]; $ucw = mb_convert_case($w, MB_CASE_UPPER); $lcw = mb_convert_case($w, MB_CASE_LOWER); if ($isName && mb_strlen($w) > 1 && $w == $ucw) { // turn all-uppercase names into all-lowercase if (mb_strlen($w) > 3 && (mb_substr($w, 0, 2) == 'MC' || mb_substr($w, 0, 2) == "O'")) { $w = mb_substr($w, 0, 1) . mb_substr($lcw, 1, 1) . mb_substr($w, 2, 1) . mb_substr($lcw, 3); } else { $w = $lcw; } } if (isset(StructuredData::$UPPERCASE_WORDS[$ucw]) || $w == $ucw) { // upper -> upper $ret .= $ucw; } else { if (!$mustCap && isset(StructuredData::$NAME_WORDS[$lcw])) { // if w is a name-word, keep as-is $ret .= $w; } else { if (!$isName && !$mustCap && isset(StructuredData::$LOWERCASE_WORDS[$lcw])) { // upper/mixed/lower -> lower $ret .= $lcw; } else { if ($w == $lcw) { // lower -> mixed $ret .= mb_convert_case($w, MB_CASE_TITLE); } else { // mixed -> mixed $ret .= $w; } } } } $m = mb_ereg_search_regs(); $w = trim($w); $mustCap = !$isName && ($w == ':' || $w == '?' || $w == '!'); } } return $ret; }
/** * 生成缩略名 * * @access public * @param string $str 需要生成缩略名的字符串 * @param string $default 默认的缩略名 * @param integer $maxLength 缩略名最大长度 * @return string */ public static function slugName($str, $default = NULL, $maxLength = 128) { $str = trim($str); if (!strlen($str)) { return $default; } if (__TYPECHO_MB_SUPPORTED__) { mb_regex_encoding(self::$charset); mb_ereg_search_init($str, "[\\w" . preg_quote('_-') . "]+"); $result = mb_ereg_search(); $return = ''; if ($result) { $regs = mb_ereg_search_getregs(); $pos = 0; do { $return .= ($pos > 0 ? '-' : '') . $regs[0]; $pos++; } while ($regs = mb_ereg_search_regs()); } $str = $return; } else { if ('UTF-8' == strtoupper(self::$charset)) { if (preg_match_all("/[\\w" . preg_quote('_-') . "]+/u", $str, $matches)) { $str = implode('-', $matches[0]); } } else { $str = str_replace(array("'", ":", "\\", "/", '"'), "", $str); $str = str_replace(array("+", ",", ' ', ',', ' ', ".", "?", "=", "&", "!", "<", ">", "(", ")", "[", "]", "{", "}"), "-", $str); } } $str = trim($str, '-_'); $str = !strlen($str) ? $default : $str; return substr($str, 0, $maxLength); }
/** * Turns text into an array of words */ function split_message($text) { global $config; // Split words if ($this->pcre_properties) { $text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); } else if ($this->mbstring_regex) { $text = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($text))); } else { $text = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); } if ($this->pcre_properties) { $matches = array(); preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches); $text = $matches[1]; } else if ($this->mbstring_regex) { mb_ereg_search_init($text, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)'); $text = array(); while (($word = mb_ereg_search_regs())) { $text[] = $word[1]; } } else { $matches = array(); preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches); $text = $matches[1]; } // remove too short or too long words $text = array_values($text); for ($i = 0, $n = sizeof($text); $i < $n; $i++) { $text[$i] = trim($text[$i]); if (utf8_strlen($text[$i]) < $config['fulltext_mysql_min_word_len'] || utf8_strlen($text[$i]) > $config['fulltext_mysql_max_word_len']) { unset($text[$i]); } } return array_values($text); }
public function substitueVariables(&$variables, $lang, $str) { $langstr = $str; // replace variables in string if (mb_ereg_search_init($langstr)) { while (false != ($vars = mb_ereg_search_regs("%[^%]*%"))) { foreach ($vars as $curly_pattern) { // $curly_pattern contatins %pattern% in replacement string $pattern = mb_substr($curly_pattern, 1, mb_strlen($curly_pattern) - 2); // avoid recursive loop if ($pattern != $str) { if (isset($variables[$lang][$pattern])) { $pattern_replacement = $variables[$lang][$pattern]; $langstr = mb_ereg_replace($curly_pattern, $pattern_replacement, $langstr); } } } } } return $langstr; }
$r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); var_dump(mb_ereg_search_setpos(15)); $r = mb_ereg_search_regs(); // get next result var_dump($r == array("pÜ")); $str = "PrÜÝ" . "fung abc pÜ"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str); $r = mb_ereg_search_regs("abc", "ms"); var_dump($r); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); $date = "1973-04-30"; mb_ereg("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", $date, $regs); var_dump($regs[3]); var_dump($regs[2]); var_dump($regs[1]); var_dump($regs[0]); $pattern = "(>[^<]*)(suffix)"; $replacement = "\\1<span class=\"search\">\\2</span>"; $body = ">whateversuffix"; $body = mb_eregi_replace($pattern, $replacement, $body); var_dump($body);
function match($regexp, $text, &$match) { if (!is_callable('mb_ereg_search_init')) { if (!preg_match($regexp, $text, $match)) { return false; } $match = $match[0]; return true; } $regexp = substr($regexp, 1, strlen($regexp) - 2 - strlen($this->_re_flags)); mb_ereg_search_init($text); if (!mb_ereg_search($regexp)) { return false; } $match = mb_ereg_search_getregs(); return true; }
public static function mark($html, $words = array()) { if (!$words || count($words) <= 0) { return $html; } $ismb = function_exists('mb_ereg_search'); $inlineTags = 'b|i|em|strong|tt|big|small|strike|u|span|a'; $ignoreTags = 'script|style|textarea'; $pattern = ''; foreach ($words as $word) { if ($pattern != '') { $pattern .= '|'; } $pattern .= $ismb ? mb_strtolower($word, 'UTF-8') : strtolower($word); if ($ismb && mb_strlen($word, 'UTF-8') >= 3 || !$ismb && strlen($word) >= 3) { $pattern .= '\\w*'; } } $newhtml = ''; $currPart = ''; $currWord = ''; $ignore = false; if ($ismb) { mb_regex_encoding('UTF-8'); mb_ereg_search_init($html, '(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})'); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { if ($ignore || $match[2] || $match[4] || $match[3] && !mb_ereg_match("\\/?({$inlineTags})", mb_strtolower($match[3], 'UTF-8'))) { if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && mb_ereg_match("{$ignoreTags}", mb_strtolower($match[3], 'UTF-8')) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= mb_strtolower($match[1], 'UTF-8'); } $currPart .= $match[0]; } $match = mb_ereg_search_regs(); } while ($match); if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } else { if (preg_match_all('/(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})/i', $html, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { if ($ignore || $match[2] || $match[4] || $match[3] && !preg_match("/^\\/?({$inlineTags})\$/i", $match[3])) { if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && preg_match("/^{$ignoreTags}\$/i", $match[3]) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= strtolower($match[1]); } $currPart .= $match[0]; } } if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } return $newhtml; }
protected function match($pattern, $position = null, $options = 'msi') { /*{{{*/ if (null === $position) { $position = $this->bytepos; } if ($this->position >= $this->length) { return false; } mb_ereg_search_init($this->text, '\\G' . $pattern, $options); mb_ereg_search_setpos($position); return mb_ereg_search_regs(); }
<?php // homepage: //$mb_str = "Алексей Федорович Карамазов был Алексей Федорович Карамазов был kyrillischer string string"; // = "Lorem ipsum dolor sit amet" mb_ereg_search_init("Алексей Федорович Карамазов был Алексей Федорович Карамазов был"); $match = mb_ereg_search_regs("ов"); var_dump($match);
if (isset($_POST['submit']) && isset($_POST['page']) && $_POST['page'] == $i) { echo "selected"; } echo ">" . $names[$i] . "</option>"; } echo "</select>\n\t\t\t<input type='submit' name='submit' value='Edit'></form>"; //if there is a GET variable make a POST variable if (isset($_GET['page'])) { $_POST['page'] = $_GET['page']; $_POST['submit'] = "submit"; } //when they select which page, this is the form to edit it if (isset($_POST['submit']) && isset($_POST['page'])) { $page = $names[$_POST['page']]; $menu = $menu_names[$_POST['page']]; //read the file $myFile = "../pages/{$menu}/{$page}.php"; $fh = fopen($myFile, "r"); $theData = fread($fh, filesize($myFile)); fclose($fh); //search it for the div tag mb_ereg_search_init($theData, "<div class='info'>"); $arro = mb_ereg_search_pos("<div class='info'>"); mb_ereg_search_init($theData, "</div>"); $arroc = mb_ereg_search_pos("</div>"); echo "<form action='change_page.php' method='POST'>\n\t\t\t\t<textarea rows='15' cols='40' name='body'>"; //get the substring between the tags and get rid of the whitespace before and after echo trim(strip_tags(substr($theData, $arro[0] + $arro[1], $arroc[0] - ($arro[0] + $arro[1])))); echo "</textarea>\n\t\t\t\t<input type='hidden' name='x' value='{$x[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='y' value='{$y[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='z' value='{$z[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='name' value='{$page}'>\n\t\t\t\t<input type='hidden' name='html' value='n'>\n\t\t\t\t<input type='hidden' name='nav' value='n'>\n\t\t\t\t<input type='submit' name='submit' value='Save'>\n\t\t\t\t</form>"; } include "../includes/layout/b2.php";
<?php $subject = "foo bar bà€œz baz"; $pattern = '\\s+'; $position = 13; mb_regex_encoding('utf-8'); mb_internal_encoding('utf-8'); mb_ereg_search_init($subject, '\\G' . $pattern, 'msi'); mb_ereg_search_setpos($position); var_dump(mb_ereg_search_regs());
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ $lang = $_REQUEST["lang"]; $xml = new SimpleXMLElement(file_get_contents("php://input")); $spell = pspell_new($lang, "", "", "utf-8", PSPELL_NORMAL); $suggestions = array(); $offset = 0; mb_regex_encoding("UTF-8"); foreach (mb_split("\n", $xml->text) as $line) { $len = mb_strlen($line, "UTF-8"); mb_ereg_search_init($line, "\\w+"); while (($wpos = mb_ereg_search_pos()) != FALSE) { $word = mb_substr($line, $wpos[0], $wpos[1]); if (!pspell_check($spell, $word)) { $woffset = mb_strlen(mb_substr($line, 0, $wpos[0]), "UTF-8"); $wlen = mb_strlen($word, "UTF-8"); array_push($suggestions, array($offset + $woffset, $wlen, pspell_suggest($spell, $word))); } } $offset += $len + 1; } $xml = new SimpleXMLElement("<spellresponse/>"); $xml->addAttribute("error", count($suggestions) ? "1" : "0"); foreach ($suggestions as $s) { $c = $xml->addChild("c", join("\t", $s[2])); $c->addAttribute("o", $s[0]);
/** * AA判定 * * @param string $msg * @return bool */ public function detectAA($msg) { if (substr_count($msg, $this->_lb) < $this->_ln) { return false; } elseif (substr_count($msg, ' ') > 5) { return true; } elseif (!mb_ereg_search_init($msg, self::REGEX)) { return false; } else { $i = 0; while ($i < 3 && mb_ereg_search()) { $i++; } return $i == 3; } }
public function findLangs($dir = '') { if (!in_array($dir, self::$PARSED_PATHS)) { $baseDir = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($dir)); foreach ($baseDir as $file) { if ($file->isFile()) { if (in_array($file->getExtension(), self::$ALLOW_EXTENSIONS) && !strstr($file->getBasename(), 'jsLangs')) { $content = @file($file->getPathname()); $implode_content = implode(' ', $content); $lang_exist = FALSE; foreach (self::$PARSE_REGEXPR as $regexpr) { $lang_exist = $lang_exist || preg_match('/' . $regexpr . '/', $implode_content); } if ($lang_exist) { foreach ($content as $line_number => $line) { foreach (self::$PARSE_REGEXPR as $regexpr) { $lang = array(); mb_regex_encoding("UTF-8"); mb_ereg_search_init($line, $regexpr); $lang = mb_ereg_search(); if ($lang) { $lang = mb_ereg_search_getregs(); //get first result do { $origin = mb_ereg_replace('!\\s+!', ' ', $lang[1]); if (!self::$FINDED_LANGS[$origin]) { self::$FINDED_LANGS[$origin] = array(); } if ($file->getExtension() == 'js') { self::$FINDED_JS_LANGS[$origin] = $origin; } $path = str_replace("\\", "/", $file->getPathname()); array_push(self::$FINDED_LANGS[$origin], $path . ':' . ($line_number + 1)); $lang = mb_ereg_search_regs(); //get next result } while ($lang); } } } } } } } } self::$PARSED_PATHS[] = $dir; $data = array('parsed_langs' => self::$FINDED_LANGS, 'js_langs' => self::$FINDED_JS_LANGS); self::$FINDED_LANGS = array(); self::$FINDED_JS_LANGS = array(); return $data; }
/** * Regular expression filter and return only replaced. * * Warning, take care that callback does not trigger any errors or the PHP will just die with some weird exit code. * * @param string|string[] $pattern Pattern or array of patterns * @param callable|string|mixed[] $replacement Replacement (string or callback) or array of replacements * @param string|string[] $subject Subject or array of subjects * @param string $option Option * @return string[] Array of filtered subjects * @throws MbRegexException When compilation error occurs * @link http://php.net/function.mb-ereg-search.php * @link http://php.net/function.mb-ereg-replace.php * @link http://php.net/function.mb-ereg-replace-callback.php */ public static function filter($pattern, $replacement, $subject, $option = '') { static::setUp($pattern); $replaceMap = self::prepareReplaceMap($pattern, $replacement); $result = array(); foreach ((array) $subject as $key => $subjectPart) { foreach ($replaceMap as $item) { list($pattern, $replacement) = $item; \mb_ereg_search_init($subjectPart, $pattern, $option); if (!\mb_ereg_search()) { continue; } $subjectPart = self::execReplace($pattern, $replacement, $subjectPart, $option); $result[$key] = $subjectPart; } } static::tearDown(); return $result; }
<?php /* Codeine * @author bergstein@trickyplan.com * @description * @package Codeine * @version 8.x */ setFn('Match', function ($Call) { $Pockets = null; mb_ereg($Call['Pattern'], $Call['Value'], $Pockets, $Call['Regex Options']); return $Pockets; }); setFn('All', function ($Call) { $Results = []; mb_ereg_search_init($Call['Value'], $Call['Pattern'], $Call['Regex Options']); $Result = mb_ereg_search(); if ($Result) { $Result = mb_ereg_search_getregs(); //get first result do { foreach ($Result as $IX => $Value) { $Results[$IX][] = $Value; } $Result = mb_ereg_search_regs(); //get next result } while ($Result); } else { $Results = false; } return $Results;
/** * Check for special characters such as delimiter, quote_char * * @access private * @param string $column * @return boolean */ private function has_special_chars(&$column) { $pattern = "[.*" . $this->dialect->delimiter . "|.*" . $this->dialect->quote_char . "]"; mb_ereg_search_init($column, $pattern); $ret = mb_ereg_search(); return $ret; }
function match($regexp, $text, $match) { if (!is_callable('mb_ereg_search_init')) { return preg_match($regexp, $text, $match); } else { $regexp = substr($regexp, 1, strlen($regexp) - 2 - strlen($this->_re_flags)); mb_ereg_search_init($text, $regexp); if (!mb_ereg_search()) { return false; } list($match) = mb_ereg_search_getregs(); return true; } }
private function addWords($fullid, $text, $weight) { if (!$text) { return; } if (self::$ismb) { mb_ereg_search_init($text, "\\w+"); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { $word = mb_strtolower($match[0], 'UTF-8'); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } $match = mb_ereg_search_regs(); } while ($match); } } else { preg_match_all("/\\w+/", $text, $matches); foreach ($matches[0] as $word) { $word = strtolower($word); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } } } }
/** * Check to see if the username has been taken, or if it is disallowed. * Also checks if it includes the " character, which we don't allow in usernames. * Used for registering, changing names, and posting anonymously with a username * * @param string $username The username to check * @param string $allowed_username An allowed username, default being $user->data['username'] * * @return mixed Either false if validation succeeded or a string which will be used as the error message (with the variable name appended) */ function validate_username($username, $allowed_username = false) { global $config, $db, $user, $cache; $clean_username = utf8_clean_string($username); $allowed_username = $allowed_username === false ? $user->data['username_clean'] : utf8_clean_string($allowed_username); if ($allowed_username == $clean_username) { return false; } // ... fast checks first. if (strpos($username, '"') !== false || strpos($username, '"') !== false || empty($clean_username)) { return 'INVALID_CHARS'; } $mbstring = $pcre = false; // generic UTF-8 character types supported? if ((version_compare(PHP_VERSION, '5.1.0', '>=') || version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>=')) && @preg_match('/\\p{L}/u', 'a') !== false) { $pcre = true; } else { if (function_exists('mb_ereg_match')) { mb_regex_encoding('UTF-8'); $mbstring = true; } } switch ($config['allow_name_chars']) { case 'USERNAME_CHARS_ANY': $pcre = true; $regex = '.+'; break; case 'USERNAME_ALPHA_ONLY': $pcre = true; $regex = '[A-Za-z0-9]+'; break; case 'USERNAME_ALPHA_SPACERS': $pcre = true; $regex = '[A-Za-z0-9-[\\]_+ ]+'; break; case 'USERNAME_LETTER_NUM': if ($pcre) { $regex = '[\\p{Lu}\\p{Ll}\\p{N}]+'; } else { if ($mbstring) { $regex = '[[:upper:][:lower:][:digit:]]+'; } else { $pcre = true; $regex = '[a-zA-Z0-9]+'; } } break; case 'USERNAME_LETTER_NUM_SPACERS': if ($pcre) { $regex = '[-\\]_+ [\\p{Lu}\\p{Ll}\\p{N}]+'; } else { if ($mbstring) { $regex = '[-\\]_+ [[:upper:][:lower:][:digit:]]+'; } else { $pcre = true; $regex = '[-\\]_+ [a-zA-Z0-9]+'; } } break; case 'USERNAME_ASCII': default: $pcre = true; $regex = '[\\x01-\\x7F]+'; break; } if ($pcre) { if (!preg_match('#^' . $regex . '$#u', $username)) { return 'INVALID_CHARS'; } } else { if ($mbstring) { $matches = array(); mb_ereg_search_init('^' . $username . '$', $regex, $matches); if (!mb_ereg_search()) { return 'INVALID_CHARS'; } } } $sql = 'SELECT username FROM ' . USERS_TABLE . "\n\t\tWHERE username_clean = '" . $db->sql_escape($clean_username) . "'"; $result = $db->sql_query($sql); $row = $db->sql_fetchrow($result); $db->sql_freeresult($result); if ($row) { return 'USERNAME_TAKEN'; } $sql = 'SELECT group_name FROM ' . GROUPS_TABLE . "\n\t\tWHERE LOWER(group_name) = '" . $db->sql_escape(utf8_strtolower($username)) . "'"; $result = $db->sql_query($sql); $row = $db->sql_fetchrow($result); $db->sql_freeresult($result); if ($row) { return 'USERNAME_TAKEN'; } $bad_usernames = $cache->obtain_disallowed_usernames(); foreach ($bad_usernames as $bad_username) { if (preg_match('#^' . $bad_username . '$#', $clean_username)) { return 'USERNAME_DISALLOWED'; } } return false; }
function wrsqz_extractTextFromMathML($formula, $encoded=true){ //Algorythm: We scan the mathML tag by tag. //If a tag is one of the allowed (math, mrow) we save it at the stack //and continue with the next. //If the tag is not allowed (mfenced, mfrac,...) we skip all mathML until its //closure (</mfenced>, </mfrac>) //If the tag is <mtext> we rearange the formula //If a tag is a closure of allowed tag, we pop it from the stack. //rapid return if nothing to do. if(strpos($formula,'mtext')===false) return $formula; //initializations $opentag = $encoded ? '«' : '<'; $closetag = $encoded ? '»' : '>'; //tags where an <mtext> can live inside. $allowedtags = array('math', 'mrow'); $pattern = $opentag.'([^'.$opentag.$closetag.']*)'.$closetag; //regexp that matches a single tag label mb_ereg_search_init($formula, $pattern); $stack = array(); //stack of opened tags $omittedcontent=false; //there is math content before the current point? $lasttag=null; //last tag of the stack $length = strlen($formula); $beginformula = strpos($formula, $opentag); //position of the first character of the last formula (in bytes). $pos=array(0,0); //CAUTION: If you change this function, be very carefull with multibyte // and non-multibyte functions. while(($pos[0]+$pos[1])<$length){ $pos = mb_ereg_search_pos($pattern); if($pos[0]+$pos[1] < $length){ //this will be always true but the last iteration mb_ereg_search_setpos($pos[0]+$pos[1]); } $tag = substr($formula, $pos[0],$pos[1]); $trimmedTag = mb_substr($tag,1,-1); //skip autoclosed tags if(mb_substr($trimmedTag,-1) == '/'){ continue; } //discard attributes if(($spacepos = mb_strpos($trimmedTag,' '))!==false){ $trimmedTag=mb_substr($trimmedTag,0,$spacepos); } if(in_array($trimmedTag,$allowedtags)){ //allowed tag $stack[]=array($trimmedTag,$tag); $lasttag = $trimmedTag; }else if($trimmedTag == '/'.$lasttag){ //close allowed tag array_pop($stack); $lasttag = end($stack); $lasttag = $lasttag[0]; //discard empty formulas if(empty($stack) && !$omittedcontent){ $formula1 = substr($formula, 0, $beginformula); if($pos[0]+$pos[1]<$length){ //this isn't the end. $formula2 = substr($formula, $pos[0]+$pos[1]); $formula = $formula1 . $formula2; $length = strlen($formula); mb_ereg_search_init($formula, $pattern); mb_ereg_search_setpos($beginformula); }else{ //this is the last iteration. $length and mb_ereg_search //string and position will be wrong, but it doesn't matter. $formula = $formula1; } } }else if($trimmedTag == 'mtext'){ $pos2 = mb_ereg_search_pos($opentag.'/mtext'.$closetag); $text = substr($formula, $pos[0]+$pos[1], $pos2[0]-($pos[0]+$pos[1])); //Decode some chars in text if($encoded) $text=wrsqz_mathmlDecode($text); $text = str_replace('·','·',$text); $text = str_replace(''',''',$text); $formula1 = substr($formula, 0, $pos[0]); //until <mtext> $formula2 = substr($formula, $pos2[0]+$pos2[1]); //from </mtext> if($omittedcontent){ //we have a non-empty formula before the text so we must close it //compute the tail (close tags) of the formula before the text //and the head (open tags) of the formula after the text. $copystack = $stack; //copy stack $tail1 = ''; $head2 = ''; while($stacktag = array_pop($copystack)){ $tail1.= $opentag.'/'.$stacktag[0].$closetag; $head2 = $stacktag[1] . $head2; } $formula1 = $formula1 . $tail1; $formula2 = $head2 . $formula2; //update $formula $formula = $formula1 . $text . $formula2; $beginformula = $pos[0]+strlen($tail1)+strlen($text); $position = $beginformula+strlen($head2); }else{ //we have an empty formula before the text so we must skip it. $head = substr($formula1, 0, $beginformula); //all before the empty formula $formula1 = substr($formula1, $beginformula); $formula = $head . $text . $formula1 . $formula2; $beginformula += strlen($text); $position = $beginformula +strlen($formula1); } //update parameters with the new formula. $length = strlen($formula); $omittedcontent = false; mb_ereg_search_init($formula, $pattern); mb_ereg_search_setpos($position); }else{ //not allowed tag: go to its closure and remember that we omitted content $pos = mb_ereg_search_pos($opentag.'/'.$trimmedTag.$closetag); if($pos === false){ return $formula; //this is an error in XML (unclosed tag); } $omittedcontent=true; mb_ereg_search_setpos($pos[0]+$pos[1]); } } return $formula; }