/** * Assumes mbstring internal encoding is set to UTF-8 * Wrapper around mb_strrpos * Find position of last occurrence of a char in a string * @param string haystack * @param string needle (you should validate this with utf8_is_valid) * @param integer (optional) offset (from left) * @return mixed integer position or FALSE on failure * @package utf8 * @subpackage strings */ function utf8_strrpos($str, $search, $offset = false) { // Strip unvalid characters $str = utf8_bad_strip($str); if (!$offset) { // Emulate behaviour of strrpos rather than raising warning if (empty($str)) { return false; } return mb_strrpos($str, $search); } else { if (!is_int($offset)) { trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING); return false; } $str = mb_substr($str, $offset); if (($pos = mb_strrpos($str, $search)) !== false) { return $pos + $offset; } return false; } }
/** * Strips out any bad bytes from a UTF-8 string and returns the rest * @param $str string input string * @return string */ static function utf8_bad_strip($str) { require_once './lib/pkp/lib/phputf8/utils/bad.php'; return utf8_bad_strip($str); }
/** * Prepares and cleans the string * * @param string $string * @param bool $clean_again * * @return mixed|string */ private function cleanString($string, $clean_again = false) { static $string_clean = false; if ($string_clean === false or $clean_again == true) { // Replace plugins with correct content JPluginHelper::importPlugin('content'); $string = JHtml::_('content.prepare', $string, ''); // Convert quotes and decode HTML entities $search = array(chr(145), chr(146), chr(147), chr(148), chr(151), '''); $replace = array("'", "'", '"', '"', '-', "'", "'"); $string = html_entity_decode(str_replace($search, $replace, $string), ENT_COMPAT, 'UTF-8'); // Strip HTML tags and remove invisible chars $string = preg_replace('@\\s+(\\r\\n|\\r|\\n|\\t)@', ' ', strip_tags($string)); // Exchange double quotes and remove white spaces for the description $string = str_replace('"', "'", $string); $string = preg_replace('@\\s+@', ' ', $string); // Remove all bad UTF8 characters with the help of the UTF8 library jimport('phputf8.utils.bad'); $string = utf8_bad_strip($string); $string_clean = trim(htmlspecialchars($string)); } return $string_clean; }
function remove_bad_characters($array) { static $bad_utf8_chars; if (!isset($bad_utf8_chars)) { $bad_utf8_chars = array("̷" => '', "̸" => '', "ᅟ" => '', "ᅠ" => '', "" => '', "" => '', "" => '', "" => '', "" => '', "" => '', "" => '', "" => '', "" => '', "" => '', " " => '', " " => '', "" => '', "ㅤ" => '', "" => '', "ᅠ" => '', "" => '', "" => '', "" => '', "" => '', "�" => '', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' '); } if (is_array($array)) { return array_map('remove_bad_characters', $array); } // Strip out any invalid characters $array = utf8_bad_strip($array); // Remove control characters $array = preg_replace('%[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]%', '', $array); // Replace some "bad" characters $array = str_replace(array_keys($bad_utf8_chars), array_values($bad_utf8_chars), $array); return $array; }
/** * Strips out any bad bytes from a UTF-8 string and returns the rest * @param $str string input string * @return string */ function utf8_bad_strip($str) { require_once 'utils/bad.php'; return utf8_bad_strip($str); }