/** * @covers Patchwork\PHP\Override\Intl::grapheme_substr * @covers Patchwork\PHP\Override\Intl::grapheme_substr_workaround62759 */ function testGrapheme_substr() { $c = "déjà"; $this->assertSame("jà", grapheme_substr($c, 2)); $this->assertSame("jà", grapheme_substr($c, -2)); // The next 3 tests are disabled due to http://bugs.php.net/62759 and 55562 //$this->assertSame( "jà", grapheme_substr($c, -2, 3) ); //$this->assertSame( "", grapheme_substr($c, -1, 0) ); //$this->assertSame( false, grapheme_substr($c, 1, -4) ); $this->assertSame("j", grapheme_substr($c, -2, -1)); $this->assertSame("", grapheme_substr($c, -2, -2)); $this->assertSame(false, grapheme_substr($c, 5, 0)); $this->assertSame(false, grapheme_substr($c, -5, 0)); $this->assertSame("jà", p::grapheme_substr($c, 2)); $this->assertSame("jà", p::grapheme_substr($c, -2)); $this->assertSame("jà", p::grapheme_substr($c, -2, 3)); $this->assertSame("", p::grapheme_substr($c, -1, 0)); $this->assertSame(false, p::grapheme_substr($c, 1, -4)); $this->assertSame("j", p::grapheme_substr($c, -2, -1)); $this->assertSame("", p::grapheme_substr($c, -2, -2)); $this->assertSame(false, p::grapheme_substr($c, 5, 0)); $this->assertSame(false, p::grapheme_substr($c, -5, 0)); $this->assertSame("jà", p::grapheme_substr_workaround62759($c, 2, 2147483647)); $this->assertSame("jà", p::grapheme_substr_workaround62759($c, -2, 2147483647)); $this->assertSame("jà", p::grapheme_substr_workaround62759($c, -2, 3)); $this->assertSame("", p::grapheme_substr_workaround62759($c, -1, 0)); $this->assertSame(false, p::grapheme_substr_workaround62759($c, 1, -4)); $this->assertSame("j", p::grapheme_substr_workaround62759($c, -2, -1)); $this->assertSame("", p::grapheme_substr_workaround62759($c, -2, -2)); $this->assertSame(false, p::grapheme_substr_workaround62759($c, 5, 0)); $this->assertSame(false, p::grapheme_substr_workaround62759($c, -5, 0)); }
/** * Realiable unicode substring. * * @param string $input * @return string */ public static function substr($input, $start = 0, $length = null) { if (is_null($length)) { return grapheme_substr($input, $start); } return grapheme_substr($input, $start, $length); }
/** * Returns the portion of string specified by the start and length parameters * * @param string $str * @param int $offset * @param int|null $length * @return string|false */ public function substr($str, $offset = 0, $length = null) { // Due fix of PHP #62759 The third argument returns an empty string if is 0 or null. if ($length !== null) { return grapheme_substr($str, $offset, $length); } return grapheme_substr($str, $offset); }
public function excerpt($field, $length = 200) { $text = strip_tags($this->{$field}); if (grapheme_strlen($text) > $length) { return grapheme_substr($text, 0, $length) . '...'; } else { return $text; } }
public static function utf8Substr($string, $start, $length = NULL) { if (function_exists('grapheme_substr')) { return grapheme_substr($string, $start, $length); } elseif (function_exists('mb_sustr')) { $length = mb_substr($string, $start, $length, 'UTF-8'); } else { $length = substr($string, $start, $length); } }
public static function summarize($aText, $aLength, $aSuffix = '...') { $text = $aText; if (grapheme_strlen($aText) > 0) { if (grapheme_strlen($text) > $aLength) { $text = trim($text); $text = grapheme_substr($text, 0, $aLength); if ($aLength > 0) { //trim the end at a word boundary $text = strrev($text); if (preg_match('/(?:\\s(\\S))|\\./', $text, $matches, PREG_OFFSET_CAPTURE) && count($matches) > 1) { $newEnd = $matches[1][1]; if ($matches[1][0] == '.') { $newEnd++; } $text = grapheme_substr($text, $newEnd); } $text = strrev($text) . $aSuffix; } } } return $text; }
<?php var_dump(grapheme_substr('Iñtërnâtiônàlizætiøn', 10, -2));
/** * Count the number of decimals that $number contains * * @param string $number * @param string $separatorSymbol * @param string $currencySymbol * @return int */ protected function countDecimalDigits($number, $separatorSymbol, $currencySymbol) { // Remove currency symbol (if any) from string $number = str_replace($currencySymbol, '', $number); // Retrieve last occurence of monetary separator symbol $lastOccurence = grapheme_strrpos($number, $separatorSymbol); if ($lastOccurence === false) { return 0; } $decimals = grapheme_substr($number, $lastOccurence + 1); return preg_match_all(sprintf('#%s#%s', $this->getRegexComponent(self::REGEX_NUMBERS), $this->getRegexComponent(self::REGEX_FLAGS)), $decimals); }
/** * Here used as a multibyte enabled equivalent of `substr()`. * * @link http://php.net/manual/en/function.grapheme-substr.php * @param string $string * @param integer $start * @param integer $length * @return string|boolean */ public function substr($string, $start, $length) { return grapheme_substr($string, $start, $length); }
static function str_pad($s, $len, $pad = ' ', $type = STR_PAD_RIGHT) { $slen = grapheme_strlen($s); if ($len <= $slen) { return $s; } $padlen = grapheme_strlen($pad); $freelen = $len - $slen; $len = $freelen % $padlen; if (STR_PAD_RIGHT == $type) { return $s . str_repeat($pad, $freelen / $padlen) . ($len ? grapheme_substr($pad, 0, $len) : ''); } if (STR_PAD_LEFT == $type) { return str_repeat($pad, $freelen / $padlen) . ($len ? grapheme_substr($pad, 0, $len) : '') . $s; } if (STR_PAD_BOTH == $type) { $freelen /= 2; $type = ceil($freelen); $len = $type % $padlen; $s .= str_repeat($pad, $type / $padlen) . ($len ? grapheme_substr($pad, 0, $len) : ''); $type = floor($freelen); $len = $type % $padlen; return str_repeat($pad, $type / $padlen) . ($len ? grapheme_substr($pad, 0, $len) : '') . $s; } user_error(__METHOD__ . '(): Padding type has to be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH', E_USER_WARNING); }
function ut_main() { $res_str = ''; $char_a_diaeresis = "ä"; // 'LATIN SMALL LETTER A WITH DIAERESIS' (U+00E4) $char_a_ring = "å"; // 'LATIN SMALL LETTER A WITH RING ABOVE' (U+00E5) $char_o_diaeresis = "ö"; // 'LATIN SMALL LETTER O WITH DIAERESIS' (U+00F6) $char_O_diaeresis = "Ö"; // 'LATIN CAPITAL LETTER O WITH DIAERESIS' (U+00D6) $char_angstrom_sign = "Å"; // 'ANGSTROM SIGN' (U+212B) $char_A_ring = "Å"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) $char_ohm_sign = "Ω"; // 'OHM SIGN' (U+2126) $char_omega = "Ω"; // 'GREEK CAPITAL LETTER OMEGA' (U+03A9) $char_combining_ring_above = "̊"; // 'COMBINING RING ABOVE' (U+030A) $char_fi_ligature = "fi"; // 'LATIN SMALL LIGATURE FI' (U+FB01) $char_long_s_dot = "ẛ"; // 'LATIN SMALL LETTER LONG S WITH DOT ABOVE' (U+1E9B) // the word 'hindi' using Devanagari characters: $hindi = "हिन्दी"; $char_a_ring_nfd = "å"; $char_A_ring_nfd = "Å"; $char_o_diaeresis_nfd = "ö"; $char_O_diaeresis_nfd = "Ö"; $char_diaeresis = "̈"; //===================================================================================== $res_str .= "\n" . 'function grapheme_strlen($string) {}' . "\n\n"; $res_str .= "\"hindi\" in devanagari strlen " . grapheme_strlen($hindi) . "\n"; $res_str .= "\"ab\" + \"hindi\" + \"cde\" strlen " . grapheme_strlen('ab' . $hindi . 'cde') . "\n"; $res_str .= "\"\" strlen " . grapheme_strlen("") . "\n"; $res_str .= "char_a_ring_nfd strlen " . grapheme_strlen($char_a_ring_nfd) . "\n"; $res_str .= "char_a_ring_nfd + \"bc\" strlen " . grapheme_strlen($char_a_ring_nfd . 'bc') . "\n"; $res_str .= "\"abc\" strlen " . grapheme_strlen('abc') . "\n"; //===================================================================================== $res_str .= "\n" . 'function grapheme_strpos($haystack, $needle, $offset = 0) {}' . "\n\n"; $tests = array(array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o", "o", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "o", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 2), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 1), array("abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "a", "false"), array("abc", "d", "false"), array("abc", "c", 2), array("abc", "b", 1), array("abc", "a", 0), array("abc", "a", 0, 0), array("abc", "a", 1, "false"), array("ababc", "a", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o", "o", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 2, 3), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "opq", "op", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "opq", "opq", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "abc", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, $char_o_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc", 2), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc", 1), array("abc", $char_a_ring_nfd . "bc", "false"), array($char_a_ring_nfd . "bc", "abcdefg", "false"), array("abc", "defghijklmnopq", "false"), array("abc", "ab", 0), array("abc", "bc", 1), array("abc", "abc", 0), array("abc", "abcd", "false"), array("abc", "ab", 0, 0), array("abc", "abc", 0, 0), array("abc", "abc", 1, "false"), array("ababc", "ab", 1, 2), array("ababc", "abc", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o" . $char_a_ring_nfd . "bc", "o" . $char_a_ring_nfd . "bc", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_a_ring_nfd . "bc" . $char_a_ring_nfd . "def", $char_a_ring_nfd . "bc" . $char_a_ring_nfd, 2, 3)); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_strpos"; if (3 == count($test)) { $result = grapheme_strpos($test[0], $test[1]); } else { $res_str .= " from {$test['2']}"; $result = grapheme_strpos($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= $result; } $res_str .= " == " . $test[count($test) - 1] . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_stripos($haystack, $needle, $offset = 0) {}' . "\n\n"; $tests = array(array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", "o", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", $char_a_ring_nfd, 2, 3), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", "o", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "O", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_A_ring_nfd, 2), array("a" . $char_A_ring_nfd . "bc", $char_a_ring_nfd, 1), array("Abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "A", "false"), array("abc", "D", "false"), array("abC", "c", 2), array("abc", "B", 1), array("Abc", "a", 0), array("abc", "A", 0, 0), array("Abc", "a", 1, "false"), array("ababc", "A", 1, 2), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", "oP", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", "opQ", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "abc", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "bC" . $char_o_diaeresis_nfd, $char_O_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "Bc", $char_A_ring_nfd . "bc", 2), array("a" . $char_a_ring_nfd . "BC", $char_a_ring_nfd . "bc", 1), array("abc", $char_a_ring_nfd . "BC", "false"), array($char_a_ring_nfd . "BC", "aBCdefg", "false"), array("aBC", "Defghijklmnopq", "false"), array("abC", "Ab", 0), array("aBC", "bc", 1), array("abC", "Abc", 0), array("abC", "aBcd", "false"), array("ABc", "ab", 0, 0), array("aBc", "abC", 0, 0), array("abc", "aBc", 1, "false"), array("ABabc", "AB", 1, 2), array("abaBc", "aBc", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o" . $char_A_ring_nfd . "bC", "O" . $char_a_ring_nfd . "bC", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bC" . $char_a_ring_nfd . "def", $char_a_ring_nfd . "Bc" . $char_a_ring_nfd, 2, 3)); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_stripos"; if (3 == count($test)) { $result = grapheme_stripos($test[0], $test[1]); } else { $res_str .= " from {$test['2']}"; $result = grapheme_stripos($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= $result; } $res_str .= " == " . $test[count($test) - 1] . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_strrpos($haystack, $needle, $offset = 0) {}' . "\n\n"; $tests = array(array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o", "o", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "o", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 2), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 1), array("abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "a", "false"), array("abc", "d", "false"), array("abc", "c", 2), array("abc", "b", 1), array("abc", "a", 0), array("abc", "a", 0, 0), array("abc", "a", 1, "false"), array("ababc", "a", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o", "o", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, 2, 3), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "opq", "op", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "opq", "opq", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "abc", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, $char_o_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc", 2), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc", 1), array("abc", $char_a_ring_nfd . "bc", "false"), array($char_a_ring_nfd . "bc", "abcdefg", "false"), array("abc", "defghijklmnopq", "false"), array("abc", "ab", 0), array("abc", "bc", 1), array("abc", "abc", 0), array("abc", "abcd", "false"), array("abc", "ab", 0, 0), array("abc", "abc", 0, 0), array("abc", "abc", 1, "false"), array("ababc", "ab", 1, 2), array("ababc", "abc", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o" . $char_a_ring_nfd . "bc", "o" . $char_a_ring_nfd . "bc", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_a_ring_nfd . "bc" . $char_a_ring_nfd . "def", $char_a_ring_nfd . "bc" . $char_a_ring_nfd, 2, 3)); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_strrpos"; if (3 == count($test)) { $result = grapheme_strrpos($test[0], $test[1]); } else { $res_str .= " from {$test['2']}"; $result = grapheme_strrpos($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= $result; } $res_str .= " == " . $test[count($test) - 1] . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_strripos($haystack, $needle, $offset = 0) {}' . "\n\n"; $tests = array(array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", "o", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", $char_a_ring_nfd, 2, 3), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", "o", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "O", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_A_ring_nfd, 2), array("a" . $char_A_ring_nfd . "bc", $char_a_ring_nfd, 1), array("Abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "A", "false"), array("abc", "D", "false"), array("abC", "c", 2), array("abc", "B", 1), array("Abc", "a", 0), array("abc", "A", 0, 0), array("Abc", "a", 1, "false"), array("ababc", "A", 1, 2), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", "oP", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", "opQ", 5), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "abc", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "bC" . $char_o_diaeresis_nfd, $char_O_diaeresis_nfd . "bc" . $char_o_diaeresis_nfd, 4), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "Bc", $char_A_ring_nfd . "bc", 2), array("a" . $char_a_ring_nfd . "BC", $char_a_ring_nfd . "bc", 1), array("abc", $char_a_ring_nfd . "BC", "false"), array($char_a_ring_nfd . "BC", "aBCdefg", "false"), array("aBC", "Defghijklmnopq", "false"), array("abC", "Ab", 0), array("aBC", "bc", 1), array("abC", "Abc", 0), array("abC", "aBcd", "false"), array("ABc", "ab", 0, 0), array("aBc", "abC", 0, 0), array("abc", "aBc", 1, "false"), array("ABabc", "AB", 1, 2), array("abaBc", "aBc", 1, 2), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o" . $char_A_ring_nfd . "bC", "O" . $char_a_ring_nfd . "bC", 2, 6), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bC" . $char_a_ring_nfd . "def", $char_a_ring_nfd . "Bc" . $char_a_ring_nfd, 2, 3)); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_strripos"; if (3 == count($test)) { $result = grapheme_strripos($test[0], $test[1]); } else { $res_str .= " from {$test['2']}"; $result = grapheme_strripos($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= $result; } $res_str .= " == " . $test[count($test) - 1] . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_substr($string, $start, $length = -1) {}' . "\n\n"; $tests = array(array("abc", 3, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false"), array("ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 2, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O"), array($char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", 2, "a" . $char_A_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 5, "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, 4, $char_O_diaeresis_nfd), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", 2, $char_a_ring_nfd . "bc"), array("a" . $char_A_ring_nfd . "bc", 1, $char_A_ring_nfd . "bc"), array("Abc", -5, "false"), array($char_a_ring_nfd . "bc", 3, "false"), array("abc", 4, "false"), array("abC", 2, "C"), array("abc", 1, "bc"), array("Abc", 1, 1, "b"), array("abc", 0, 2, "ab"), array("Abc", -4, 1, "false"), array("ababc", 1, 2, "ba"), array("ababc", 0, 10, "ababc"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, 10, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -1, "Op"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -2, "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -3, ""), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -2, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -3, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -4, "a" . $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -5, "a" . $char_a_ring_nfd . "b"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -6, "a" . $char_a_ring_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -7, "a"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -8, ""), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -7, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -6, "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -5, "c" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -4, $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -3, "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -2, "pq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -1, "q"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 7, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 6, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 5, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 4, "a" . $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 3, "a" . $char_a_ring_nfd . "b"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 2, "a" . $char_a_ring_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 1, "a"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 0, ""), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -2, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -3, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -4, "a" . $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -5, "a" . $char_a_ring_nfd . "b"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -6, "a" . $char_a_ring_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -7, "a"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -8, ""), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "false")); foreach ($tests as $test) { $arg0 = urlencode($test[0]); $res_str .= "substring of \"{$arg0}\" from \"{$test['1']}\" - grapheme_substr"; if (3 == count($test)) { $result = grapheme_substr($test[0], $test[1]); } else { $res_str .= " with length {$test['2']}"; $result = grapheme_substr($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_strstr($haystack, $needle, $before_needle = FALSE) {}' . "\n\n"; $tests = array(array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "o", "o", "o"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "o", "false"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, $char_o_diaeresis_nfd, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, $char_a_ring_nfd . "bc"), array("abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "a", "false"), array("abc", "d", "false"), array("abc", "c", "c"), array("abc", "b", "bc"), array("abc", "a", "abc"), array("abc", "ab", "abc"), array("abc", "abc", "abc"), array("abc", "bc", "bc"), array("abc", "a", FALSE, "abc"), array("abc", "a", TRUE, ""), array("abc", "b", TRUE, "a"), array("abc", "c", TRUE, "ab"), array("ababc", "bab", TRUE, "a"), array("ababc", "abc", TRUE, "ab"), array("ababc", "abc", FALSE, "abc"), array("ab" . $char_a_ring_nfd . "c", "d", "false"), array("bc" . $char_a_ring_nfd . "a", "a", "a"), array("a" . $char_a_ring_nfd . "bc", "b", "bc"), array($char_a_ring_nfd . "bc", "a", "false"), array($char_a_ring_nfd . "abc", "ab", "abc"), array("abc" . $char_a_ring_nfd, "abc", "abc" . $char_a_ring_nfd), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc", $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, FALSE, $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc", "a", TRUE, ""), array($char_a_ring_nfd . "abc", "b", TRUE, $char_a_ring_nfd . "a"), array("ab" . $char_a_ring_nfd . "c", "c", TRUE, "ab" . $char_a_ring_nfd), array("aba" . $char_a_ring_nfd . "bc", "ba" . $char_a_ring_nfd . "b", TRUE, "a"), array("ababc" . $char_a_ring_nfd, "abc" . $char_a_ring_nfd, TRUE, "ab"), array("abab" . $char_a_ring_nfd . "c", "ab" . $char_a_ring_nfd . "c", FALSE, "ab" . $char_a_ring_nfd . "c")); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_strstr"; if (3 == count($test)) { $result = grapheme_strstr($test[0], $test[1]); } else { $res_str .= " before flag is " . ($test[2] ? "TRUE" : "FALSE"); $result = grapheme_strstr($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_stristr($haystack, $needle, $before_needle = FALSE) {}' . "\n\n"; $tests = array(array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, $char_O_diaeresis_nfd, $char_o_diaeresis_nfd), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", "o", "O"), array("a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, "o", "false"), array($char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", $char_a_ring_nfd, $char_a_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc", $char_A_ring_nfd, $char_a_ring_nfd . "bc"), array("abc", $char_a_ring_nfd, "false"), array($char_a_ring_nfd . "bc", "A", "false"), array("abc", "d", "false"), array("abc", "C", "c"), array("aBc", "b", "Bc"), array("abc", "A", "abc"), array("abC", "ab", "abC"), array("abc", "aBc", "abc"), array("abC", "bc", "bC"), array("abc", "A", FALSE, "abc"), array("abc", "a", TRUE, ""), array("aBc", "b", TRUE, "a"), array("abc", "C", TRUE, "ab"), array("aBabc", "bab", TRUE, "a"), array("ababc", "aBc", TRUE, "ab"), array("ababc", "abC", FALSE, "abc"), array("ab" . $char_a_ring_nfd . "c", "d", "false"), array("bc" . $char_a_ring_nfd . "A", "a", "A"), array("a" . $char_a_ring_nfd . "bc", "B", "bc"), array($char_A_ring_nfd . "bc", "a", "false"), array($char_a_ring_nfd . "abc", "Ab", "abc"), array("abc" . $char_A_ring_nfd, "abc", "abc" . $char_A_ring_nfd), array("a" . $char_a_ring_nfd . "bc", $char_A_ring_nfd . "bc", $char_a_ring_nfd . "bc"), array("a" . $char_A_ring_nfd . "bc", $char_a_ring_nfd, FALSE, $char_A_ring_nfd . "bc"), array("a" . $char_a_ring_nfd . "bc", "A", TRUE, ""), array($char_a_ring_nfd . "aBc", "b", TRUE, $char_a_ring_nfd . "a"), array("ab" . $char_a_ring_nfd . "c", "C", TRUE, "ab" . $char_a_ring_nfd), array("aba" . $char_A_ring_nfd . "bc", "ba" . $char_a_ring_nfd . "b", TRUE, "a"), array("ababc" . $char_a_ring_nfd, "aBc" . $char_A_ring_nfd, TRUE, "ab"), array("abAB" . $char_A_ring_nfd . "c", "ab" . $char_a_ring_nfd . "c", FALSE, "AB" . $char_A_ring_nfd . "c")); foreach ($tests as $test) { $arg1 = urlencode($test[1]); $arg0 = urlencode($test[0]); $res_str .= "find \"{$arg1}\" in \"{$arg0}\" - grapheme_stristr"; if (3 == count($test)) { $result = grapheme_stristr($test[0], $test[1]); } else { $res_str .= " before flag is " . ($test[2] ? "TRUE" : "FALSE"); $result = grapheme_stristr($test[0], $test[1], $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_extract($haystack, $size, $extract_type = GRAPHEME_EXTR_COUNT, $start = 0[, $next])' . "\n\n"; $tests = array(array("abc", 3, "abc"), array("abc", 2, "ab"), array("abc", 1, "a"), array("abc", 0, ""), array("abc", 1, 0, "a"), array("abc", 1, 1, "b"), array("abc", 1, 2, "c"), array("abc", 0, 2, ""), array("abc", 3, 0, 3, "abc"), array("abc", 2, 0, 2, "ab"), array("abc", 1, 0, 1, "a"), array("abc", 0, 0, 0, ""), array("abc", 1, 0, 1, "a"), array("abc", 1, 1, 2, "b"), array("abc", 1, 2, 3, "c"), array("abc", 0, 2, 2, ""), array("http://news.bbc.co.uk/2/hi/middle_east/7831588.stm", 48, 48, 50, "tm"), array($char_a_ring_nfd . "bc", 3, $char_a_ring_nfd . "bc"), array($char_a_ring_nfd . "bc", 2, $char_a_ring_nfd . "b"), array($char_a_ring_nfd . "bc", 1, $char_a_ring_nfd . ""), array($char_a_ring_nfd . "bc", 3, 0, 5, $char_a_ring_nfd . "bc"), array($char_a_ring_nfd . "bc", 2, 0, 4, $char_a_ring_nfd . "b"), array($char_a_ring_nfd . "bc", 1, 0, 3, $char_a_ring_nfd . ""), array($char_a_ring_nfd . "bcde", 2, 3, 5, "bc"), array($char_a_ring_nfd . "bcde", 2, 4, 6, "cd"), array($char_a_ring_nfd . "bcde" . $char_a_ring_nfd . "f", 4, 5, 11, "de" . $char_a_ring_nfd . "f"), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, $char_a_ring_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 1, $char_a_ring_nfd . ""), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 1, 0, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 1, 2, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 1, 3, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 1, 4, $char_diaeresis), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 0, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 2, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 3, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 4, $char_diaeresis . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 7, $char_diaeresis . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 8, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 10, $char_diaeresis), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 11, "false")); $next = -1; foreach ($tests as $test) { $arg0 = urlencode($test[0]); $res_str .= "extract from \"{$arg0}\" \"{$test['1']}\" graphemes - grapheme_extract"; if (3 == count($test)) { $result = grapheme_extract($test[0], $test[1]); } elseif (4 == count($test)) { $res_str .= " starting at byte position {$test['2']}"; $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_COUNT, $test[2]); } else { $res_str .= " starting at byte position {$test['2']} with \$next"; $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_COUNT, $test[2], $next); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]); if (5 == count($test)) { $res_str .= " \$next={$next} == {$test['3']} "; if ($next != $test[3]) { $res_str .= "***FAILED***"; } } $res_str .= "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_extract($haystack, $size, $extract_type = GRAPHEME_EXTR_MAXBYTES, $start = 0)' . "\n\n"; $tests = array(array("abc", 3, "abc"), array("abc", 2, "ab"), array("abc", 1, "a"), array("abc", 0, ""), array($char_a_ring_nfd . "bc", 5, $char_a_ring_nfd . "bc"), array($char_a_ring_nfd . "bc", 4, $char_a_ring_nfd . "b"), array($char_a_ring_nfd . "bc", 1, ""), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 9, $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 10, $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 11, $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 6, $char_a_ring_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 3, $char_a_ring_nfd . ""), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 4, $char_a_ring_nfd . ""), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 5, $char_a_ring_nfd . ""), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 6, $char_a_ring_nfd . $char_o_diaeresis_nfd), array($char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 7, $char_a_ring_nfd . $char_o_diaeresis_nfd . "c"), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, 0, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, 2, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, 3, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, 4, $char_diaeresis), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 6, 0, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 6, 2, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 6, 3, $char_o_diaeresis_nfd . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 5, 4, $char_diaeresis . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 5, 7, $char_diaeresis . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, 8, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 10, $char_diaeresis), array($char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, 11, "false")); foreach ($tests as $test) { $arg0 = urlencode($test[0]); $res_str .= "extract from \"{$arg0}\" \"{$test['1']}\" graphemes - grapheme_extract GRAPHEME_EXTR_MAXBYTES"; if (3 == count($test)) { $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_MAXBYTES); } else { $res_str .= " starting at byte position {$test['2']}"; $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_MAXBYTES, $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== $res_str .= "\n" . 'function grapheme_extract($haystack, $size, $extract_type = GRAPHEME_EXTR_MAXCHARS, $start = 0)' . "\n\n"; $tests = array(array("abc", 3, "abc"), array("abc", 2, "ab"), array("abc", 1, "a"), array("abc", 0, ""), array("abc" . $char_o_diaeresis_nfd, 0, ""), array("abc" . $char_o_diaeresis_nfd, 1, "a"), array("abc" . $char_o_diaeresis_nfd, 2, "ab"), array("abc" . $char_o_diaeresis_nfd, 3, "abc"), array("abc" . $char_o_diaeresis_nfd, 4, "abc"), array("abc" . $char_o_diaeresis_nfd, 5, "abc" . $char_o_diaeresis_nfd), array("abc" . $char_o_diaeresis_nfd, 6, "abc" . $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . "abc", 0, ""), array($char_o_diaeresis_nfd . "abc", 1, ""), array($char_o_diaeresis_nfd . "abc", 2, $char_o_diaeresis_nfd), array($char_o_diaeresis_nfd . "abc", 3, $char_o_diaeresis_nfd . "a"), array($char_o_diaeresis_nfd . "abc", 4, $char_o_diaeresis_nfd . "ab"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 5, $char_o_diaeresis_nfd . "abc"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 6, $char_o_diaeresis_nfd . "abc"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 7, $char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, $char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "x"), array("abc", 3, 0, "abc"), array("abc", 2, 1, "bc"), array("abc", 1, 2, "c"), array("abc", 0, 3, "false"), array("abc", 1, 3, "false"), array("abc", 1, 999, "false"), array($char_o_diaeresis_nfd . "abc", 1, 6, "false"), array($char_o_diaeresis_nfd . "abc", 1, 999, "false"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 0, $char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "x"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 1, $char_diaeresis . "abc" . $char_a_ring_nfd . "xy"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 2, "abc" . $char_a_ring_nfd . "xyz"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 3, "abc" . $char_a_ring_nfd . "xyz"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 4, "bc" . $char_a_ring_nfd . "xyz"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 5, "c" . $char_a_ring_nfd . "xyz"), array($char_o_diaeresis_nfd . "abc" . $char_a_ring_nfd . "xyz", 8, 6, $char_a_ring_nfd . "xyz")); foreach ($tests as $test) { $arg0 = urlencode($test[0]); $res_str .= "extract from \"{$arg0}\" \"{$test['1']}\" graphemes - grapheme_extract GRAPHEME_EXTR_MAXCHARS"; if (3 == count($test)) { $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_MAXCHARS); } else { $res_str .= " starting at byte position {$test['2']}"; $result = grapheme_extract($test[0], $test[1], GRAPHEME_EXTR_MAXCHARS, $test[2]); } $res_str .= " = "; if ($result === false) { $res_str .= 'false'; } else { $res_str .= urlencode($result); } $res_str .= " == " . urlencode($test[count($test) - 1]) . check_result($result, $test[count($test) - 1]) . "\n"; } //===================================================================================== return $res_str; }
/** * Returns part of a string. * * @param string $string The string to be converted. * @param integer $start The part's start position, zero based. * @param integer $length The part's length. * @param string $charset The charset to use when calculating the part's * position and length, defaults to current * charset. * * @return string The string's part. */ public static function substr($string, $start, $length = null, $charset = 'UTF-8') { if (is_null($length)) { $length = self::length($string, $charset) - $start; } if ($length === 0) { return ''; } $error = false; /* Try mbstring. */ if (Horde_Util::extensionExists('mbstring')) { $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset)); /* mb_substr() returns empty string on failure. */ if (strlen($ret)) { return $ret; } $error = true; } /* Try iconv. */ if (Horde_Util::extensionExists('iconv')) { $ret = @iconv_substr($string, $start, $length, $charset); /* iconv_substr() returns false on failure. */ if ($ret !== false) { return $ret; } $error = true; } /* Try intl. */ if (Horde_Util::extensionExists('intl')) { $ret = self::convertCharset(@grapheme_substr(self::convertCharset($string, $charset, 'UTF-8'), $start, $length), 'UTF-8', $charset); /* grapheme_substr() returns false on failure. */ if ($ret !== false) { return $ret; } $error = true; } return $error ? '' : substr($string, $start, $length); }
/** * Returns a substring from a string. * * As a special case, the method returns an empty string if the starting position is equal to the string's length * or if the substring's length, if specified, is `0`. * * @param string $string The string to be looked into. * @param int $startPos The position of the substring's first character. * @param int $length **OPTIONAL. Default is** *as many characters as the starting character is followed by*. The * length of the substring. * * @return string The substring. */ public static function substr($string, $startPos, $length = null) { assert('is_cstring($string) && is_int($startPos) && (!isset($length) || is_int($length))', vs(isset($this), get_defined_vars())); assert('(0 <= $startPos && $startPos < self::length($string)) || ' . '($startPos == self::length($string) && (!isset($length) || $length == 0))', vs(isset($this), get_defined_vars())); assert('!isset($length) || ($length >= 0 && $startPos + $length <= self::length($string))', vs(isset($this), get_defined_vars())); $res; if (!isset($length)) { $res = grapheme_substr($string, $startPos); } else { $res = grapheme_substr($string, $startPos, $length); } return is_cstring($res) ? $res : ""; }
/** * Returns the portion of string specified by the start and length parameters * * @param string $str * @param int $offset * @param int|null $length * @param string $encoding * @return string|false */ public function substr($str, $offset = 0, $length = null) { return grapheme_substr($str, $offset, $length); }
/** * Generate dataset. * * Formats: * - (positive and negative) currency amounts with their own currency symbol * - (positive and negative) currency amounts with ISO currency symbol * - (positive and negative) numbers (without currency symbol) * - (positive and negative) numbers expressed in scientific notation (without currency symbol) * * @return array */ public function valuesProvider() { $data = []; $values = [0, 0.1, 0.01, 1000, 1234.61, 12345678.9]; $values = array_unique(array_merge($values, array_map(function ($i) { return -$i; }, $values))); foreach ($this->locales as $locale) { $formatter = \NumberFormatter::create($locale, \NumberFormatter::CURRENCY); $currencySymbol = $formatter->getSymbol(\NumberFormatter::CURRENCY_SYMBOL); $isoSymbol = $formatter->getTextAttribute(\NumberFormatter::CURRENCY_CODE); $groupSep = $formatter->getSymbol(\NumberFormatter::MONETARY_GROUPING_SEPARATOR_SYMBOL); $numDecimals = $formatter->getAttribute(\NumberFormatter::FRACTION_DIGITS); $posPre = $formatter->getTextAttribute(\NumberFormatter::POSITIVE_PREFIX); $negPre = $formatter->getTextAttribute(\NumberFormatter::NEGATIVE_PREFIX); $posSuf = $formatter->getTextAttribute(\NumberFormatter::POSITIVE_SUFFIX); $negSuf = $formatter->getTextAttribute(\NumberFormatter::NEGATIVE_SUFFIX); $exponantiatior = \NumberFormatter::create($locale, \NumberFormatter::SCIENTIFIC); foreach ($values as $value) { // Restore currency symbol $formatter->setSymbol(\NumberFormatter::CURRENCY_SYMBOL, $currencySymbol); if (is_float($value)) { // If value is float and current currency does not have cents, jump it if ($numDecimals === 0) { continue; } // Create a currency with less decimal places then required (w/ currency symbol) $formatter->setAttribute(\NumberFormatter::FRACTION_DIGITS, $numDecimals - 1); $currency = preg_replace('/^[\\xC2\\xA0\\s]+|[\\xC2\\xA0\\s]+$/u', '', $formatter->format($value)); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered $data[] = [$locale, false, false, (double) sprintf('%.' . ($numDecimals - 1) . 'f', $value), $currency]; // Filtered $data[] = [$locale, false, true, (double) sprintf('%.' . ($numDecimals - 1) . 'f', $value), $currency]; // Filtered // Create a currency with less decimal places then required (w/o currency symbol) $currency = preg_replace('#' . preg_quote($currencySymbol) . '#u', '', $currency); $currency = preg_replace('/^[\\xC2\\xA0\\s]+|[\\xC2\\xA0\\s]+$/u', '', $currency); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered $data[] = [$locale, false, false, (double) sprintf('%.' . ($numDecimals - 1) . 'f', $value), $currency]; // Filtered $data[] = [$locale, false, true, $currency, $currency]; // Not filtered // Create a currency with more decimal places then required (w/ currency symbol) $formatter->setAttribute(\NumberFormatter::FRACTION_DIGITS, $numDecimals + 1); $currency = preg_replace('/^[\\xC2\\xA0\\s]+|[\\xC2\\xA0\\s]+$/u', '', $formatter->format($value)); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered $data[] = [$locale, false, false, (double) sprintf('%.' . ($numDecimals + 1) . 'f', $value), $currency]; // Filtered $data[] = [$locale, false, true, (double) sprintf('%.' . ($numDecimals + 1) . 'f', $value), $currency]; // Filtered // Create a currency with more decimal places then required (w/o currency symbol) $currency = preg_replace('#' . preg_quote($currencySymbol) . '#u', '', $currency); $currency = preg_replace('/^[\\xC2\\xA0\\s]+|[\\xC2\\xA0\\s]+$/u', '', $currency); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered $data[] = [$locale, false, false, (double) sprintf('%.' . ($numDecimals + 1) . 'f', $value), $currency]; // Filtered $data[] = [$locale, false, true, $currency, $currency]; // Not filtered } // Restore correct number of maximum decimal places $formatter->setAttribute(\NumberFormatter::FRACTION_DIGITS, $numDecimals); // Create completely formatted currency value (w/ currency symbol) $currency = $formatter->formatCurrency($value, $isoSymbol); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $value, $currency]; // Filtered // Create currency value with letters inside $randomPos = rand(0, grapheme_strlen($currency) - 1); $currency = grapheme_substr($currency, 0, $randomPos) . 'X' . grapheme_substr($currency, $randomPos); // echo $currency . PHP_EOL; $daa[] = [$locale, true, true, $currency, $currency]; // Not filtered // Create currency value (w/ currency symbol) (w/o group separators) if (grapheme_strpos($currency, $groupSep) !== false) { $formatter->setSymbol(\NumberFormatter::MONETARY_GROUPING_SEPARATOR_SYMBOL, null); $currency = $formatter->formatCurrency($value, $isoSymbol); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $value, $currency]; // Filtered $formatter->setSymbol(\NumberFormatter::MONETARY_GROUPING_SEPARATOR_SYMBOL, $groupSep); } // Create currency value with ISO currency symbol $formatter->setSymbol(\NumberFormatter::CURRENCY_SYMBOL, $isoSymbol); $currency = $formatter->format($value); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $value, $currency]; // Filtered // Create currency value with ISO currency symbol (w/o group separators) if (grapheme_strpos($currency, $groupSep) !== false) { $formatter->setSymbol(\NumberFormatter::MONETARY_GROUPING_SEPARATOR_SYMBOL, null); $currency = $formatter->format($value); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $value, $currency]; // Filtered $formatter->setSymbol(\NumberFormatter::MONETARY_GROUPING_SEPARATOR_SYMBOL, $groupSep); } // Create currency values with wrong ISO currency symbol or other text after it $currency = $currency . 'S'; // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered // Create currency value w/o any currency symbol $formatter->setSymbol(\NumberFormatter::CURRENCY_SYMBOL, null); $currency = $formatter->format($value); // preg_replace('/^[\xC2\xA0\s]+|[\xC2\xA0\s]+$/u', '', ...); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $value, $currency]; // Filtered when currency symbol is not mandatory if ($value >= 0) { // Create currency value expressed in scientific notation w/o any currency symbol $currency = $exponantiatior->format($value, \NumberFormatter::TYPE_DOUBLE); // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered // Create currency value expressed in scientific notation with proper currency symbol $currency = $posPre . $currency . $posSuf; // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered } else { // Create negative currency value expressed in scientific notation with proper currency symbol $currency = $exponantiatior->format(abs($value), \NumberFormatter::TYPE_DOUBLE); $currency = $negPre . $currency . $negSuf; // echo $currency . PHP_EOL; $data[] = [$locale, true, true, $currency, $currency]; // Not filtered $data[] = [$locale, true, false, $currency, $currency]; // Not filtered } } // echo '---' . PHP_EOL; } return $data; }
<?php var_dump(substr('deja', 1, -4)); var_dump(substr('deja', -1, 0)); var_dump(grapheme_substr('deja', 1, -4)); var_dump(intl_get_error_message()); var_dump(grapheme_substr('deja', -1, 0)); var_dump(grapheme_substr('déjà', 1, -4)); var_dump(intl_get_error_message()); var_dump(grapheme_substr('déjà', -1, 0));
/** * Multibyte save version of the PHP substr() function which is based on grapheme units. * * Returns the portion of the given string specified by the offset and * length parameters. * * Will never return FALSE in case of invalid parameters but returns an * empty string in those cases. * * @param string $string The string to return the substring of * @param int $offset The character position where to start * @param int|null $length Length of the substring to return beginning from the given offset * @return string */ public static function getSubstring(string $string, int $offset, int $length = null) { // try/catch-block is necessary because grapheme_substr produces E_NOTICE (which becomes // converted to an exception by the system) in case $offset is not available in $haystack // (e.g. empty haystack with offset "0") try { // ignore length parameter if not set if ($length === null) { return ($substring = grapheme_substr($string, $offset)) === false ? '' : $substring; } // grapheme_substr() ignores the length-parameter when set to "0", but we want an empty string when length is "0" if ($length === 0) { return ''; } return ($substring = grapheme_substr($string, $offset, $length)) === false ? '' : $substring; } catch (Exception $e) { return ''; } }
<?php var_dump(grapheme_substr('FOK', 1, 20), grapheme_substr('한국어', 1, 20));
static function grapheme_substr_workaround62759($s, $start, $len) { // Intl based http://bugs.php.net/62759 and 55562 workaround if (2147483647 == $len) { return grapheme_substr($s, $start); } $slen = grapheme_strlen($s); $start = (int) $start; if (0 > $start) { $start += $slen; } if (0 > $start) { return false; } if ($start >= $slen) { return false; } $rem = $slen - $start; if (0 > $len) { $len += $rem; } if (0 === $len) { return ''; } if (0 > $len) { return false; } if ($len > $rem) { $len = $rem; } return grapheme_substr($s, $start, $len); }
/** * Get part of string * * @link http://php.net/manual/en/function.mb-substr.php * * @param string $str <p> * The string being checked. * </p> * @param int $start <p> * The first position used in str. * </p> * @param int $length [optional] <p> * The maximum length of the returned string. * </p> * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string * * @return string mb_substr returns the portion of * str specified by the * start and * length parameters. */ public static function substr($str, $start = 0, $length = null, $cleanUtf8 = false) { static $bug62759; $str = (string) $str; if (!isset($str[0])) { return ''; } // init self::checkForSupport(); if ($cleanUtf8 === true) { // iconv and mbstring are not tolerant to invalid encoding // further, their behaviour is inconsistent with that of PHP's substr $str = self::clean($str); } if ($length === null) { $length = (int) self::strlen($str); } else { $length = (int) $length; } if (self::$support['mbstring'] === true) { return mb_substr($str, $start, $length, 'UTF-8'); } if (self::$support['iconv'] === true) { if (!isset($bug62759)) { $bug62759 = 'à' === grapheme_substr('éà', 1, -2); } if ($bug62759) { return (string) Intl::grapheme_substr_workaround62759($str, $start, $length); } else { return (string) grapheme_substr($str, $start, $length); } } // fallback // split to array, and remove invalid characters $array = self::split($str); // extract relevant part, and join to make sting again return implode(array_slice($array, $start, $length)); }
function ht_substr($string, $from, $to = 2147483647) { return grapheme_substr($string, $from, $to); }
<?php $haystack = 'Auf der Straße nach Paris habe ich mit dem Fahrer gesprochen'; var_dump(grapheme_stripos($haystack, 'pariS '), grapheme_stristr($haystack, 'paRis '), grapheme_substr($haystack, grapheme_stripos($haystack, 'Paris')));