/** * Detects encoding of xml-formatted text. * @param string $string The input xml-formatted text. * @param string $default_encoding This is the default encoding to be returned if there is no way the xml-text's encoding to be detected. If it not spesified, the system encoding is assumed then. * @return string Returns the detected encoding. * @todo The second parameter is to be eliminated. See api_detect_encoding_html(). */ function api_detect_encoding_xml($string, $default_encoding = null) { if (preg_match(_PCRE_XML_ENCODING, $string, $matches)) { return api_refine_encoding_id($matches[1]); } if (api_is_valid_utf8($string)) { return 'UTF-8'; } if (empty($default_encoding)) { $default_encoding = _api_mb_internal_encoding(); } return api_refine_encoding_id($default_encoding); }
/** * Sorts an array using natural order algorithm. * @param array $array The input array. * @param string $language (optional) The language in which comparison is to be made. If language is omitted, interface language is assumed then. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return bool Returns TRUE on success, FALSE on error. * This function is aimed at replacing the function natsort() for sorting human-language strings. * @link http://php.net/manual/en/function.natsort.php */ function api_natsort(&$array, $language = null, $encoding = null) { if (INTL_INSTALLED) { if (empty($encoding)) { $encoding = _api_mb_internal_encoding(); } $collator = _api_get_alpha_numerical_collator($language); if (is_object($collator)) { global $_api_collator, $_api_encoding; $_api_collator = $collator; $_api_encoding = $encoding; return uasort($array, '_api_cmp'); } } return natsort($array); }
/** * Converts character encoding of a xml-formatted text from UTF-8 into a specified encoding. If inside the text the encoding is declared, it is modified accordingly. * @param string $string The text being converted. * @param string $to_encoding (optional) The encoding that text is being converted to. If it is omited, the platform character set is assumed. * @return string Returns the converted xml-text. */ static function api_utf8_decode_xml($string, $to_encoding = null) { if (empty($to_encoding)) { $to_encoding = _api_mb_internal_encoding(); } return self::_api_convert_encoding_xml($string, $to_encoding, 'UTF-8'); }
/** * Splits a string by a regular expression, UTF-8 aware when it is applicable. * @param string $pattern The pattern to search for, as a string. * @param string $subject The input string. * @param int $limit (optional) If specified, then only substrings up to $limit are returned with the rest of the string being placed in the last substring. A limit of -1, 0 or null means "no limit" and, as is standard across PHP. * @param int $flags (optional) $flags can be any combination of the following flags (combined with bitwise | operator): * PREG_SPLIT_NO_EMPTY - if this flag is set, only non-empty pieces will be returned; * PREG_SPLIT_DELIM_CAPTURE - if this flag is set, parenthesized expression in the delimiter pattern will be captured and returned as well; * PREG_SPLIT_OFFSET_CAPTURE - If this flag is set, for every occurring match the appendant string offset will also be returned. * Note that this changes the return value in an array where every element is an array consisting of the matched string at offset 0 and its string offset into subject at offset 1. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return array Returns an array containing substrings of $subject split along boundaries matched by $pattern. * @link http://php.net/preg_split */ function api_preg_split($pattern, $subject, $limit = -1, $flags = 0, $encoding = null) { if (empty($encoding)) { $encoding = _api_mb_internal_encoding(); } return preg_split(api_is_utf8($encoding) ? $pattern . 'u' : $pattern, $subject, $limit, $flags); }
/** * This function returns the encoding, currently used by the system. * @return string The system's encoding. * Note: The value of api_get_setting('platform_charset') is tried to be returned first, * on the second place the global variable $charset is tried to be returned. If for some * reason both attempts fail, then the libraly's internal value will be returned. */ function api_get_system_encoding() { static $system_encoding; if (!isset($system_encoding)) { $encoding_setting = api_get_setting('platform_charset'); if (empty($encoding_setting)) { global $charset; if (empty($charset)) { return _api_mb_internal_encoding(); } return $charset; } $system_encoding = $encoding_setting; } return $system_encoding; }