/** * UTF-8 aware replacement for strpos(). * * Returns the position of the first occurrence of $sub in the $str. If $sub is * not found, it returns false. * * This will get alot slower if an negative $offset is used. * * This function may return boolean false, but may also return a non-boolean 0 * which evaluates to false. Use the === operator for testing the return value * of this function. * * @param string $str The string to search in * @param string $sub The string to search for * @param int $offset If presented, it specifies the position in the string * to begin the search * @param bool $ci Should the function be case-insensitive? * @return int The position or false on failure */ function utf8_search($str, $sub, $offset = 0, $ci = false) { $n = $r = 0; // The position of matching UTF-8 character within the $sub if ($offset < 0) { $offset += utf8_length($str); if ($offset < 0) { $offset = 0; } } while ($n < $offset) { if (utf8_get_char($str, $j) === false) { return false; } ++$n; } while (true) { if (!isset($sub_cp[$r])) { if (utf8_get_char($sub, $i, $cp) !== false) { $sub_cp[] = array($cp); if ($ci && ($_ = unicode_swapcase($cp)) != $cp) { $sub_cp[$r][] = $_; } } elseif (!$r) { trigger_error('utf8_search: The string to search for is empty'); return false; } else { return $n; } } $offset = $n + $r; if (!isset($str_cp[$offset])) { if (utf8_get_char($str, $j, $cp) !== false) { $str_cp[$offset] = $cp; } else { return false; } } if (in_array($str_cp[$offset], $sub_cp[$r])) { $r++; } else { unset($str_cp[$n++]); $r = 0; } } }
/** * @dataProvider providerUtf8Length */ public function testUtf8Length($str, $rv) { $this->assertEquals(utf8_length($str), $rv); }
/** * UTF-8 aware replacement for substr(). * * Returns the portion of the string specified by the start and end positions. * * @param string $str The UTF-8 encoded string * @param int $start The start position * @param int $end The end position * @return string The portion of the string */ function utf8_slice($str, $start, $end = null) { $start = (int) $start; if ($end !== null) { $end = (int) $end; } if ($start < 0 || $end < 0) { $len = utf8_length($str); if ($start < 0) { $start += $len; if ($start < 0) { $start = 0; } } if ($end < 0) { $end += $len; if ($end < 0) { $end = 0; } } } if ($end === null) { $end = -1; } elseif ($start > $end) { return ''; } $j = 0; $chunk = ''; while (($char = utf8_get_char($str, $i)) !== false) { if ($j >= $start) { $chunk .= $char; } if ($j++ == $end) { break; } } return $chunk; }