/** * UTF-8 aware replacement for strpos(). * * Returns the position of the first occurrence of $sub in the $str. If $sub is * not found, it returns false. * * This will get alot slower if an negative $offset is used. * * This function may return boolean false, but may also return a non-boolean 0 * which evaluates to false. Use the === operator for testing the return value * of this function. * * @param string $str The string to search in * @param string $sub The string to search for * @param int $offset If presented, it specifies the position in the string * to begin the search * @param bool $ci Should the function be case-insensitive? * @return int The position or false on failure */ function utf8_search($str, $sub, $offset = 0, $ci = false) { $n = $r = 0; // The position of matching UTF-8 character within the $sub if ($offset < 0) { $offset += utf8_length($str); if ($offset < 0) { $offset = 0; } } while ($n < $offset) { if (utf8_get_char($str, $j) === false) { return false; } ++$n; } while (true) { if (!isset($sub_cp[$r])) { if (utf8_get_char($sub, $i, $cp) !== false) { $sub_cp[] = array($cp); if ($ci && ($_ = unicode_swapcase($cp)) != $cp) { $sub_cp[$r][] = $_; } } elseif (!$r) { trigger_error('utf8_search: The string to search for is empty'); return false; } else { return $n; } } $offset = $n + $r; if (!isset($str_cp[$offset])) { if (utf8_get_char($str, $j, $cp) !== false) { $str_cp[$offset] = $cp; } else { return false; } } if (in_array($str_cp[$offset], $sub_cp[$r])) { $r++; } else { unset($str_cp[$n++]); $r = 0; } } }
/** * UTF-8 aware replacement for strcmp(). * * Comparison. * * Returns an integral value indicating the relationship between the strings. * * - A zero value indicates that both strings are equal. * - A value greater than zero indicates that the first character that does not * match has a greater value in $str than in $otherstr. * - And a value less than zero indicates the opposite. * * @param string $str The UTF-8 encoded string * @param string $otherstr Other UTF-8 encoded string * @param bool $ci Should the function be case-insensitive? * @return int */ function utf8_compare($str, $otherstr, $ci = false) { $diff = 0; $char1 = $char2 = ''; do { if ($char1 !== false) { $char1 = utf8_get_char($str, $i, $cp1); } else { $diff--; } if ($char2 !== false) { $char2 = utf8_get_char($otherstr, $j, $cp2); } else { $diff++; } if ($char1 === false) { if ($char2 === false) { return $diff; } } elseif ($char2 !== false) { if ($cp1 != $cp2 && (!$ci || $cp1 != unicode_swapcase($cp2))) { return $cp1 > $cp2 ? 1 : -1; } } } while (true); }
/** * @dataProvider providerUnicodeSwapcase */ public function testUnicodeSwapcase($cp, $rv) { $this->assertEquals(unicode_swapcase($cp), $rv); }
/** * Returns a string with uppercase characters converted to lowercase and * lowercase characters converted to uppercase. * * @param string $str The UTF-8 encoded string * @return string The swapcased string */ function utf8_swapcase($str) { $rv = ''; while (($char = utf8_get_char($str, $i, $cp)) !== false) { $_ = unicode_swapcase($cp); if ($_ != $cp) { $char = utf8_chr($_); } $rv .= $char; } return $rv; }