function test_search($test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP') { mb_regex_encoding($test_enc); $str = mb_convert_encoding($str, $test_enc, $in_enc); $look_for = mb_convert_encoding($look_for, $test_enc, $in_enc); mb_ereg_search_init($str, $look_for, $opt); while (mb_ereg_search_pos()) { $regs = mb_ereg_search_getregs(); array_shift($regs); printf("(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding(is_array($regs) ? implode('-', $regs) : '', $in_enc, $test_enc)); } }
/** * A cross between mb_split and preg_split, adding the preg_split flags * to mb_split. * @param string $pattern * @param string $string * @param int $limit * @param int $flags * @return array */ private static function mbSplit($pattern, $string, $limit = -1, $flags = 0) { $strlen = strlen($string); // bytes! mb_ereg_search_init($string); $lengths = array(); $position = 0; while (($array = mb_ereg_search_pos($pattern, '')) !== false) { // capture split $lengths[] = array($array[0] - $position, false, null); // move position $position = $array[0] + $array[1]; // capture delimiter $regs = mb_ereg_search_getregs(); $lengths[] = array($array[1], true, isset($regs[1]) && $regs[1]); // Continue on? if ($position >= $strlen) { break; } } // Add last bit, if not ending with split $lengths[] = array($strlen - $position, false, null); // Substrings $parts = array(); $position = 0; $count = 1; foreach ($lengths as $length) { $is_delimiter = $length[1]; $is_captured = $length[2]; if ($limit > 0 && !$is_delimiter && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY) && ++$count > $limit) { if ($length[0] > 0 || ~$flags & PREG_SPLIT_NO_EMPTY) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position), $position) : mb_strcut($string, $position); } break; } elseif ((!$is_delimiter || $flags & PREG_SPLIT_DELIM_CAPTURE && $is_captured) && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY)) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position, $length[0]), $position) : mb_strcut($string, $position, $length[0]); } $position += $length[0]; } return $parts; }
function match($regexp, $text, $match) { if (!is_callable('mb_ereg_search_init')) { return preg_match($regexp, $text, $match); } else { $regexp = substr($regexp, 1, strlen($regexp) - 2 - strlen($this->_re_flags)); mb_ereg_search_init($text, $regexp); if (!mb_ereg_search()) { return false; } list($match) = mb_ereg_search_getregs(); return true; } }
/** * 生成缩略名 * * @access public * @param string $str 需要生成缩略名的字符串 * @param string $default 默认的缩略名 * @param integer $maxLength 缩略名最大长度 * @return string */ public static function slugName($str, $default = NULL, $maxLength = 128) { $str = trim($str); if (!strlen($str)) { return $default; } if (__TYPECHO_MB_SUPPORTED__) { mb_regex_encoding(self::$charset); mb_ereg_search_init($str, "[\\w" . preg_quote('_-') . "]+"); $result = mb_ereg_search(); $return = ''; if ($result) { $regs = mb_ereg_search_getregs(); $pos = 0; do { $return .= ($pos > 0 ? '-' : '') . $regs[0]; $pos++; } while ($regs = mb_ereg_search_regs()); } $str = $return; } else { if ('UTF-8' == strtoupper(self::$charset)) { if (preg_match_all("/[\\w" . preg_quote('_-') . "]+/u", $str, $matches)) { $str = implode('-', $matches[0]); } } else { $str = str_replace(array("'", ":", "\\", "/", '"'), "", $str); $str = str_replace(array("+", ",", ' ', ',', ' ', ".", "?", "=", "&", "!", "<", ">", "(", ")", "[", "]", "{", "}"), "-", $str); } } $str = trim($str, '-_'); $str = !strlen($str) ? $default : $str; return substr($str, 0, $maxLength); }
/* Codeine * @author bergstein@trickyplan.com * @description * @package Codeine * @version 8.x */ setFn('Match', function ($Call) { $Pockets = null; mb_ereg($Call['Pattern'], $Call['Value'], $Pockets, $Call['Regex Options']); return $Pockets; }); setFn('All', function ($Call) { $Results = []; mb_ereg_search_init($Call['Value'], $Call['Pattern'], $Call['Regex Options']); $Result = mb_ereg_search(); if ($Result) { $Result = mb_ereg_search_getregs(); //get first result do { foreach ($Result as $IX => $Value) { $Results[$IX][] = $Value; } $Result = mb_ereg_search_regs(); //get next result } while ($Result); } else { $Results = false; } return $Results; });
public function findLangs($dir = '') { if (!in_array($dir, self::$PARSED_PATHS)) { $baseDir = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($dir)); foreach ($baseDir as $file) { if ($file->isFile()) { if (in_array($file->getExtension(), self::$ALLOW_EXTENSIONS) && !strstr($file->getBasename(), 'jsLangs')) { $content = @file($file->getPathname()); $implode_content = implode(' ', $content); $lang_exist = FALSE; foreach (self::$PARSE_REGEXPR as $regexpr) { $lang_exist = $lang_exist || preg_match('/' . $regexpr . '/', $implode_content); } if ($lang_exist) { foreach ($content as $line_number => $line) { foreach (self::$PARSE_REGEXPR as $regexpr) { $lang = array(); mb_regex_encoding("UTF-8"); mb_ereg_search_init($line, $regexpr); $lang = mb_ereg_search(); if ($lang) { $lang = mb_ereg_search_getregs(); //get first result do { $origin = mb_ereg_replace('!\\s+!', ' ', $lang[1]); if (!self::$FINDED_LANGS[$origin]) { self::$FINDED_LANGS[$origin] = array(); } if ($file->getExtension() == 'js') { self::$FINDED_JS_LANGS[$origin] = $origin; } $path = str_replace("\\", "/", $file->getPathname()); array_push(self::$FINDED_LANGS[$origin], $path . ':' . ($line_number + 1)); $lang = mb_ereg_search_regs(); //get next result } while ($lang); } } } } } } } } self::$PARSED_PATHS[] = $dir; $data = array('parsed_langs' => self::$FINDED_LANGS, 'js_langs' => self::$FINDED_JS_LANGS); self::$FINDED_LANGS = array(); self::$FINDED_JS_LANGS = array(); return $data; }
private function addWords($fullid, $text, $weight) { if (!$text) { return; } if (self::$ismb) { mb_ereg_search_init($text, "\\w+"); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { $word = mb_strtolower($match[0], 'UTF-8'); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } $match = mb_ereg_search_regs(); } while ($match); } } else { preg_match_all("/\\w+/", $text, $matches); foreach ($matches[0] as $word) { $word = strtolower($word); if (!isset($this->words[$word])) { $this->words[$word] = array($fullid => $weight); } else { if (!isset($this->words[$word][$fullid])) { $this->words[$word][$fullid] = $weight; } else { $this->words[$word][$fullid] += $weight; } } } } }
var_dump($r === array("PrÜÝ" . "fung")); var_dump(mb_ereg_search_setpos(15)); $r = mb_ereg_search_regs(); // get next result var_dump($r == array("pÜ")); $str = "PrÜÝ" . "fung abc pÜ"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str); $r = mb_ereg_search_regs("abc", "ms"); var_dump($r); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); $date = "1973-04-30"; mb_ereg("([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})", $date, $regs); var_dump($regs[3]); var_dump($regs[2]); var_dump($regs[1]); var_dump($regs[0]); $pattern = "(>[^<]*)(suffix)"; $replacement = "\\1<span class=\"search\">\\2</span>"; $body = ">whateversuffix"; $body = mb_eregi_replace($pattern, $replacement, $body); var_dump($body); $pattern = "(>[^<]*)(suffix)"; $replacement = "\\1<span class=\"search\">\\2</span>";
function match($regexp, $text, &$match) { if (!is_callable('mb_ereg_search_init')) { if (!preg_match($regexp, $text, $match)) { return false; } $match = $match[0]; return true; } $regexp = substr($regexp, 1, strlen($regexp) - 2 - strlen($this->_re_flags)); mb_ereg_search_init($text); if (!mb_ereg_search($regexp)) { return false; } $match = mb_ereg_search_getregs(); return true; }
public static function mark($html, $words = array()) { if (!$words || count($words) <= 0) { return $html; } $ismb = function_exists('mb_ereg_search'); $inlineTags = 'b|i|em|strong|tt|big|small|strike|u|span|a'; $ignoreTags = 'script|style|textarea'; $pattern = ''; foreach ($words as $word) { if ($pattern != '') { $pattern .= '|'; } $pattern .= $ismb ? mb_strtolower($word, 'UTF-8') : strtolower($word); if ($ismb && mb_strlen($word, 'UTF-8') >= 3 || !$ismb && strlen($word) >= 3) { $pattern .= '\\w*'; } } $newhtml = ''; $currPart = ''; $currWord = ''; $ignore = false; if ($ismb) { mb_regex_encoding('UTF-8'); mb_ereg_search_init($html, '(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})'); if (mb_ereg_search()) { $match = mb_ereg_search_getregs(); do { if ($ignore || $match[2] || $match[4] || $match[3] && !mb_ereg_match("\\/?({$inlineTags})", mb_strtolower($match[3], 'UTF-8'))) { if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && mb_ereg_match("{$ignoreTags}", mb_strtolower($match[3], 'UTF-8')) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= mb_strtolower($match[1], 'UTF-8'); } $currPart .= $match[0]; } $match = mb_ereg_search_regs(); } while ($match); if ($currWord && mb_ereg_match($pattern, $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } else { if (preg_match_all('/(\\w+)|([^<\\w]+)|<(\\/?[a-zA-Z-])[^>]*>|(<!--.*?-->|\\(%.*?%\\)|\\{%.*?%\\})/i', $html, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { if ($ignore || $match[2] || $match[4] || $match[3] && !preg_match("/^\\/?({$inlineTags})\$/i", $match[3])) { if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } $currPart = $currWord = ''; if ($ignore) { if ($ignore && $match[3] && $match[3] == $ignore) { $ignore = false; } } else { if ($match[3] && preg_match("/^{$ignoreTags}\$/i", $match[3]) && substr($match[0], -2) != '/>') { $ignore = '/' . $match[3]; } } $newhtml .= $match[0]; } else { if ($match[1]) { $currWord .= strtolower($match[1]); } $currPart .= $match[0]; } } if ($currWord && preg_match("/^{$pattern}\$/", $currWord)) { $newhtml .= self::markIt($currPart); } else { $newhtml .= $currPart; } } } return $newhtml; }
/** * Tries to guess the encoding used for an Html document * * @param string $html a character encoding name * @param string $return_loc_info if meta http-equiv info was used to * find the encoding, then if $return_loc_info is true, we * return the location of charset substring. This allows converting to * UTF-8 later so cached pages will display correctly and * redirects without char encoding won't be given a different hash. * * @return mixed either string or array if string then guessed encoding, * if array guessed encoding, start_pos of where charset info came from, * length */ function guessEncodingHtml($html, $return_loc_info = false) { /* If the doc is HTML and it uses a http-equiv to set the encoding then we override what the server says (if anything). As we are going to convert to UTF-8 we remove the charset info from the meta tag so cached pages will display correctly and redirects without char encoding won't be given a different hash. */ $end_head = stripos($html, "</head"); if ($end_head) { $reg = "/charset(\\s*)=(\\s*)(\\'|\")?((\\w|\\-)+)(\\'|\")?/u"; $is_match = preg_match($reg, $html, $match); if (!$is_match) { $reg = "charset(\\s*)=(\\s*)(\\'|\")?((\\w|\\-)+)(\\'|\")?"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($html); mb_ereg_search($reg); $match = mb_ereg_search_getregs(); if (isset($match[0])) { $is_match = true; } } if ($is_match && isset($match[6])) { $len_c = strlen($match[0]); if (($match[6] == "'" || $match[6] == '"') && $match[3] != $match[6]) { $len_c--; } $start_charset = strpos($html, $match[0]); if ($start_charset + $len_c < $end_head) { if (isset($match[4])) { $encoding = strtoupper($match[4]); if ($return_loc_info) { return array($encoding, $start_charset, $len_c); } return $encoding; } } } } return mb_detect_encoding($html, 'auto'); }
protected function updateTemplate_OrderLink(HTML_Indexer $index = null, Kiwi_Product $product_data = null) { if ($index === null) { $index = $this->_index; } if ($product_data === null) { $product_data = $this->_product_data; } $vars = array('link_objednat', 'vyberova_vlastnost_prirustek_height_popup', 'objednani'); foreach ($vars as $varname) { ${$varname} = $index->{$varname}; } if (!empty($objednani)) { if (empty($link_objednat)) { throw new Template_Element_Missing_Exception('link_objednat'); } if (count($link_objednat) !== 1) { throw new Template_Invalid_Structure_Exception('The "link_objednat" element duplicity'); } if (count($vyberova_vlastnost_prirustek_height_popup) > 1) { throw new Template_Invalid_Structure_Exception('The "vyberova_vlastnost_prirustek_height_popup" element duplicity'); } if (!empty($vyberova_vlastnost_prirustek_height_popup)) { $vyberova_vlastnost_prirustek_height_popup = $vyberova_vlastnost_prirustek_height_popup[0]; try { $prirustek_height = $vyberova_vlastnost_prirustek_height_popup->specification; } catch (HTML_No_Such_Element_Attribute_Exception $e) { throw new Template_Invalid_Structure_Exception('The "vyberova_vlastnost_prirustek_height_popup" element must be group-derived element'); } } else { $prirustek_height = null; } $link_objednat = $link_objednat[0]; if ($link_objednat->tag != 'Navpoint_View') { throw new Template_Invalid_Structure_Exception('The "link_objednat" element must be of type "Navpoint_View", not "' . $link_objednat->tag . '"'); } $navpoint = clone $link_objednat; $npqs = $this->ns . 'p=' . $product_data->ID; if ($this->_propval_photo !== null) { $npqs .= '&' . $this->ns . 'pvp=' . $this->_propval_photo; } $navpoint->vo->qs = $npqs; if ($prirustek_height !== null && $navpoint->vo->popup !== null) { mb_ereg_search_init($navpoint->vo->popup); mb_ereg_search("^(.*)height=([0-9]+)(.*)\$", 'i'); if ($match = mb_ereg_search_getregs() !== false) { $new_height = (int) $match[2] + (int) $prirustek_height * $product_data->selectablePropertiesCount; $new_popup = $match[1] . "height={$new_height}" . $match[3]; $navpoint->vo->popup = $new_popup; } } foreach ($objednani as $elem) { $elem->add($navpoint); } } }