/** * Takes a codepoint and returns its correspondent UTF-8 encoded character. * Astral planes are supported, ie the intger input can be > 0xFFFF. Occurrances of the BOM are ignored. * Surrogates are not allowed. * @param int $codepoint The Unicode codepoint. * @return string Returns the corresponding UTF-8 character. * @author Henri Sivonen, mailto:hsivonen@iki.fi * @link http://hsivonen.iki.fi/php-utf8/ * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS. * @see _api_utf8_from_unicode() * This is a UTF-8 aware version of the function chr(). * @link http://php.net/manual/en/function.chr.php */ function _api_utf8_chr($codepoint) { // ASCII range (including control chars) if ($codepoint >= 0 && $codepoint <= 0x7f) { $result = chr($codepoint); // 2 byte sequence } else { if ($codepoint <= 0x7ff) { $result = chr(0xc0 | $codepoint >> 6) . chr(0x80 | $codepoint & 0x3f); // Byte order mark (skip) } else { if ($codepoint == 0xfeff) { // nop -- zap the BOM $result = ''; // Test for illegal surrogates } else { if ($codepoint >= 0xd800 && $codepoint <= 0xdfff) { // found a surrogate $result = _api_utf8_chr(0xfffd); // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard. // 3 byte sequence } else { if ($codepoint <= 0xffff) { $result = chr(0xe0 | $codepoint >> 12) . chr(0x80 | $codepoint >> 6 & 0x3f) . chr(0x80 | $codepoint & 0x3f); // 4 byte sequence } else { if ($codepoint <= 0x10ffff) { $result = chr(0xf0 | $codepoint >> 18) . chr(0x80 | $codepoint >> 12 & 0x3f) . chr(0x80 | $codepoint >> 6 & 0x3f) . chr(0x80 | $codepoint & 0x3f); } else { // out of range $result = _api_utf8_chr(0xfffd); } } } } } } return $result; }
/** * Finds first occurrence of a string within another. * @param string $haystack The string from which to get the first occurrence. * @param mixed $needle The string to be found. * @param bool $before_needle (optional) Determines which portion of $haystack this function returns. The default value is FALSE. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. * Notes: * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character. * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. * If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. * This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings. * @link http://php.net/manual/en/function.strstr * @link http://php.net/manual/en/function.mb-strstr */ function api_strstr($haystack, $needle, $before_needle = false, $encoding = null) { if (empty($encoding)) { $encoding = _api_mb_internal_encoding(); } if (!is_string($needle)) { $needle = (int) $needle; if (api_is_utf8($encoding)) { $needle = _api_utf8_chr($needle); } else { $needle = chr($needle); } } if ($needle == '') { return false; } if (_api_is_single_byte_encoding($encoding)) { return strstr($haystack, $needle, $before_needle); } if (_api_mb_supports($encoding)) { return mb_strstr($haystack, $needle, $before_needle, $encoding); } return strstr($haystack, $needle, $before_needle); }