function _mb_strlen($str, $encoding = null) { if (null === $encoding) { $encoding = get_option('blog_charset'); } // The solution below works only for UTF-8, // so in case of a different charset just use built-in strlen() if (!in_array($encoding, array('utf8', 'utf-8', 'UTF8', 'UTF-8'))) { return strlen($str); } if (_wp_can_use_pcre_u()) { // Use the regex unicode support to separate the UTF-8 characters into an array preg_match_all('/./us', $str, $match); return count($match[0]); } $regex = '/(?: [\\x00-\\x7F] # single-byte sequences 0xxxxxxx | [\\xC2-\\xDF][\\x80-\\xBF] # double-byte sequences 110xxxxx 10xxxxxx | \\xE0[\\xA0-\\xBF][\\x80-\\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 | [\\xE1-\\xEC][\\x80-\\xBF]{2} | \\xED[\\x80-\\x9F][\\x80-\\xBF] | [\\xEE-\\xEF][\\x80-\\xBF]{2} | \\xF0[\\x90-\\xBF][\\x80-\\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 | [\\xF1-\\xF3][\\x80-\\xBF]{3} | \\xF4[\\x80-\\x8F][\\x80-\\xBF]{2} )/x'; $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement do { // We had some string left over from the last round, but we counted it in that last round. $count--; // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) $pieces = preg_split($regex, $str, 1000); // Increment $count += count($pieces); } while ($str = array_pop($pieces)); // If there's anything left over, repeat the loop. // Fencepost: preg_split() always returns one extra item in the array return --$count; }
/** * Finds the title tag and replaces it if found; will echo content from buffer otherwise. * * @uses _wp_can_use_pcre_u() WP Core function * (Compat for lower than WP 4.1.0 provided within The SEO Framework) * * @since 1.0.0 * @since 1.0.2: Echos $content, always. * * @param string $content The content with possible title tag. * @return void When title is found. */ public function find_title_tag($content) { //* Check if we can use preg_match. if (_wp_can_use_pcre_u()) { //* Let's use regex. if (1 === preg_match('/<title.*?<\\/title>/is', $content, $matches)) { $title_tag = isset($matches[0]) ? $matches[0] : null; if (isset($title_tag)) { $this->replace_title_tag($title_tag, $content); $this->title_found_and_flushed = true; return; } } } else { //* Let's count. 0.0003s faster, but less reliable. $start = stripos($content, '<title'); if (false !== $start) { $end = stripos($content, '</title>', $start); if (false !== $end) { //* +8 is "</title>" length $title_tag = substr($content, $start, $end - $start + 8); if (false !== $title_tag) { $this->replace_title_tag($title_tag, $content); $this->title_found_and_flushed = true; return; } } } } //* Can't be escaped, as content is unknown. echo $content; }
/** * Sanitize the Redirect URL * * @since 2.2.4 * * @param string $new_value String with potentially unwanted redirect URL. * @return string The Sanitized Redirect URL */ protected function s_redirect_url($new_value) { $url = strip_tags($new_value); if ($url) { $allow_external = $this->allow_external_redirect(); /** * Sanitize the redirect URL to only a relative link and removes first slash * @requires WP 4.1.0 and up to prevent adding upon itself. */ if (!$allow_external) { $url = $this->s_relative_url($url); } //* Find a path. if (_wp_can_use_pcre_u()) { //* URL pattern excluding path. $pattern = '/' . '((((http)(s)?)?)\\:)?' . '(\\/\\/)?' . '((www.)?)' . '(.*\\.[a-zA-Z0-9]*)' . '(?:\\/)?' . '/'; $is_path = !preg_match($pattern, $url); } else { $parsed_url = wp_parse_url($url); $is_path = !isset($parsed_url['host']) && isset($parsed_url['path']); } //* If link is relative, make it full again if ($is_path) { //* The url is a relative path $path = $url; //* Try WPMUdev Domain Mapping. $wpmu_url = $this->the_url_wpmudev_domainmap($path, true); if ($wpmu_url && is_array($wpmu_url)) { $url = $wpmu_url[0]; $scheme = $wpmu_url[1]; } //* Try Donncha Domain Mapping. if (!isset($scheme)) { $dm_url = $this->the_url_donncha_domainmap($path, true); if ($dm_url && is_array($dm_url)) { $url = $dm_url[0]; $scheme = $dm_url[1]; } } //* Everything else. if (!isset($scheme)) { $url = $this->the_home_url_from_cache(true) . ltrim($path, ' /'); $scheme = is_ssl() ? 'https' : 'http'; } //* When nothing is found, fall back on WP defaults (is_ssl). $scheme = isset($scheme) ? $scheme : ''; $url = $this->set_url_scheme($url, $scheme); } } /** * Applies filters the_seo_framework_301_noqueries : bool remove query args from 301 * @since 2.5.0 */ $noqueries = (bool) apply_filters('the_seo_framework_301_noqueries', true); /** * Remove queries from the URL * * Returns plain Home URL if $allow_external is set to false and only a query has been supplied * But that's okay. The URL was rogue anyway :) */ if ($noqueries) { /** * Remove query args * * @see The_SEO_Framework_Sanitize::s_url * @since 2.2.4 */ $new_value = $this->s_url($url); } else { /** * Allow query string parameters. XSS safe. */ $new_value = esc_url_raw($url); } //* Save url return $new_value; }
function _mb_strpos($haystack, $needle, $offset = 0, $encoding = null) { if (null === $encoding) { $encoding = get_option('blog_charset'); } // The solution below works only for UTF-8, // So in case of a different charset just use built-in strpos() if (!in_array($encoding, array('utf8', 'utf-8', 'UTF8', 'UTF-8'))) { return $offset === 0 ? strpos($haystack, $needle) : strpos($haystack, $needle, $offset); } $haystack_len = mb_strlen($haystack); if ($offset < (int) 0 || $offset > $haystack_len) { trigger_error('mb_strpos(): Offset not contained in string', E_USER_WARNING); return false; } if (!is_string($needle)) { $needle = (int) $needle; if (!is_int($needle)) { trigger_error('mb_strpos(): Array to string conversion', E_USER_WARNING); return false; } } if (empty($needle)) { trigger_error('mb_strpos(): Empty needle', E_USER_WARNING); return false; } // Slice off the offset $haystack_sub = mb_substr($haystack, $offset); if (_wp_can_use_pcre_u()) { // Use the regex unicode support to separate the UTF-8 characters into an array preg_match_all("/./us", $haystack, $match_h); preg_match_all("/{$needle}/us", $haystack_sub, $match_n); $inter = array_intersect($match_h[0], $match_n[0]); if (!isset($inter)) { return false; } //* Prevent bugs, (re)assign var. $pos = null; // Find first occurence greater than or equal to offset foreach ($inter as $key => $value) { if ($key >= $offset) { $pos = $key; break; } } //* No key has been found. if (!isset($pos)) { return false; } return (int) $pos; } $regex = '/( [\\x00-\\x7F] # single-byte sequences 0xxxxxxx | [\\xC2-\\xDF][\\x80-\\xBF] # double-byte sequences 110xxxxx 10xxxxxx | \\xE0[\\xA0-\\xBF][\\x80-\\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 | [\\xE1-\\xEC][\\x80-\\xBF]{2} | \\xED[\\x80-\\x9F][\\x80-\\xBF] | [\\xEE-\\xEF][\\x80-\\xBF]{2} | \\xF0[\\x90-\\xBF][\\x80-\\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 | [\\xF1-\\xF3][\\x80-\\xBF]{3} | \\xF4[\\x80-\\x8F][\\x80-\\xBF]{2} )/x'; /** * Place haystack into array */ $match_h = array(''); // Start with 1 element instead of 0 since the first thing we do is pop do { // We had some string left over from the last round, but we counted it in that last round. array_pop($match_h); // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) $pieces = preg_split($regex, $haystack, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $match_h = array_merge($match_h, $pieces); } while (count($pieces) > 1 && ($haystack = array_pop($pieces))); // If there's anything left over, repeat the loop. /** * Place haystack offset into array */ $match_hs = array(''); // Start with 1 element instead of 0 since the first thing we do is pop do { // We had some string left over from the last round, but we counted it in that last round. array_pop($match_hs); // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) $pieces = preg_split($regex, $haystack_sub, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $match_hs = array_merge($match_hs, $pieces); } while (count($pieces) > 1 && ($haystack_sub = array_pop($pieces))); // If there's anything left over, repeat the loop. /** * Put needle into array */ $match_n = array(''); // Start with 1 element instead of 0 since the first thing we do is pop do { // We had some string left over from the last round, but we counted it in that last round. array_pop($match_n); // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) $pieces = preg_split($regex, $needle, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $match_n = array_merge($match_n, $pieces); } while (count($pieces) > 1 && ($needle = array_pop($pieces))); // If there's anything left over, repeat the loop. /** * Compute match of haystack offset with needle * If passed, return the array key number within the full haystack. */ if (false !== in_array($match_n[0], $match_hs)) { $inter = array_intersect($match_h, $match_n); if (!isset($inter)) { return false; } //* Prevent bugs, (re)assign var. $pos = null; // Find first occurence greater than or equal to offset foreach ($inter as $key => $value) { if ($key >= $offset) { $pos = $key; break; } } //* No key has been found. if (!isset($pos)) { return false; } return (int) $pos; } else { return false; } }