示例#1
0
/**
* Assumes mbstring internal encoding is set to UTF-8
* Wrapper around mb_strrpos
* Find position of last occurrence of a char in a string
* @param string haystack
* @param string needle (you should validate this with utf8_is_valid)
* @param integer (optional) offset (from left)
* @return mixed integer position or FALSE on failure
* @package utf8
* @subpackage strings
*/
function utf8_strrpos($str, $search, $offset = false)
{
    // Strip unvalid characters
    $str = utf8_bad_strip($str);
    if (!$offset) {
        // Emulate behaviour of strrpos rather than raising warning
        if (empty($str)) {
            return false;
        }
        return mb_strrpos($str, $search);
    } else {
        if (!is_int($offset)) {
            trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING);
            return false;
        }
        $str = mb_substr($str, $offset);
        if (($pos = mb_strrpos($str, $search)) !== false) {
            return $pos + $offset;
        }
        return false;
    }
}
示例#2
0
 /**
  * Strips out any bad bytes from a UTF-8 string and returns the rest
  * @param $str string input string
  * @return string
  */
 static function utf8_bad_strip($str)
 {
     require_once './lib/pkp/lib/phputf8/utils/bad.php';
     return utf8_bad_strip($str);
 }
 /**
  * Prepares and cleans the string
  *
  * @param string $string
  * @param bool   $clean_again
  *
  * @return mixed|string
  */
 private function cleanString($string, $clean_again = false)
 {
     static $string_clean = false;
     if ($string_clean === false or $clean_again == true) {
         // Replace plugins with correct content
         JPluginHelper::importPlugin('content');
         $string = JHtml::_('content.prepare', $string, '');
         // Convert quotes and decode HTML entities
         $search = array(chr(145), chr(146), chr(147), chr(148), chr(151), ''');
         $replace = array("'", "'", '"', '"', '-', "'", "'");
         $string = html_entity_decode(str_replace($search, $replace, $string), ENT_COMPAT, 'UTF-8');
         // Strip HTML tags and remove invisible chars
         $string = preg_replace('@\\s+(\\r\\n|\\r|\\n|\\t)@', ' ', strip_tags($string));
         // Exchange double quotes and remove white spaces for the description
         $string = str_replace('"', "'", $string);
         $string = preg_replace('@\\s+@', ' ', $string);
         // Remove all bad UTF8 characters with the help of the UTF8 library
         jimport('phputf8.utils.bad');
         $string = utf8_bad_strip($string);
         $string_clean = trim(htmlspecialchars($string));
     }
     return $string_clean;
 }
function remove_bad_characters($array)
{
    static $bad_utf8_chars;
    if (!isset($bad_utf8_chars)) {
        $bad_utf8_chars = array("̷" => '', "̸" => '', "ᅟ" => '', "ᅠ" => '', "​" => '', "‌" => '', "‍" => '', "‎" => '', "‏" => '', "‪" => '', "‫" => '', "‬" => '', "‭" => '', "‮" => '', " " => '', " " => '', "⁠" => '', "ㅤ" => '', "" => '', "ᅠ" => '', "" => '', "" => '', "" => '', "" => '', "�" => '', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ', " " => ' ');
    }
    if (is_array($array)) {
        return array_map('remove_bad_characters', $array);
    }
    // Strip out any invalid characters
    $array = utf8_bad_strip($array);
    // Remove control characters
    $array = preg_replace('%[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]%', '', $array);
    // Replace some "bad" characters
    $array = str_replace(array_keys($bad_utf8_chars), array_values($bad_utf8_chars), $array);
    return $array;
}
示例#5
0
 /**
  * Strips out any bad bytes from a UTF-8 string and returns the rest
  * @param $str string input string
  * @return string
  */
 function utf8_bad_strip($str)
 {
     require_once 'utils/bad.php';
     return utf8_bad_strip($str);
 }