示例#1
0
/**
 * Finds first occurrence of 8bit data in the string
 *
 * Function finds first 8bit symbol or html entity that represents 8bit character.
 * Search start is defined by $offset argument. Search ends at $maxlength position.
 * If $maxlength is not defined or bigger than provided string, search ends when 
 * string ends.
 *
 * Check returned data type in order to avoid confusion between bool(false) 
 * (not found) and int(0) (first char in the string).
 * @param string $haystack
 * @param integer $offset
 * @param integer $maxlength
 * @return mixed integer with first 8bit character position or boolean false 
 * @since 1.5.2 and 1.4.7
 */
function sq_strpos_8bit($haystack, $offset = 0, $maxlength = false)
{
    $ret = false;
    if ($maxlength === false || strlen($haystack) < $maxlength) {
        $maxlength = strlen($haystack);
    }
    for ($i = $offset; $i < $maxlength; $i++) {
        /* rh7-8 compatibility. don't use full 8bit range in regexp */
        if (preg_match('/[\\200-\\237]|\\240|[\\241-\\377]/', $haystack[$i])) {
            /* we have 8bit char. stop here and return position */
            $ret = $i;
            break;
        } elseif ($haystack[$i] == '&') {
            $substring = substr($haystack, $i);
            /**
             * 1. look for "&#(decimal number);" where decimal_number is bigger than 127
             * 2. look for "&x(hexadecimal number);", where hex number is bigger than x7f
             * 3. look for any html character entity that is not 7bit html special char. Use 
             * own sq_get_html_translation_table() function with 'utf-8' character set in 
             * order to get all html entities.
             */
            if (preg_match('/^&#(\\d+);/', $substring, $match) && $match[1] > 127 || preg_match('/^&x([0-9a-f]+);/i', $substring, $match) && $match[1] > "" || preg_match('/^&([a-z]+);/i', $substring, $match) && !in_array($match[0], get_html_translation_table(HTML_SPECIALCHARS)) && in_array($match[0], sq_get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'utf-8'))) {
                $ret = $i;
                break;
            }
        }
    }
    return $ret;
}
示例#2
0
/**
 * sq_htmlentities
 *
 * Convert all applicable characters to HTML entities.
 * Minimal php requirement - v.4.0.5
 *
 * @param string $string string that has to be sanitized
 * @param integer $quote_style quote encoding style. Possible values (without quotes):
 *              <ul>
 *                <li>ENT_COMPAT - (default) encode double quotes</li>
 *                <li>ENT_NOQUOTES - don't encode double or single quotes</li>
 *                <li>ENT_QUOTES - encode double and single quotes</li>
 *              </ul>
 * @param string $charset charset used for encoding. defaults to 'us-ascii', 'auto' uses $default_charset global value.
 * @return string sanitized string
 */
function sq_htmlentities($string, $quote_style = ENT_COMPAT, $charset = 'us-ascii')
{
    // get translation table
    $sq_html_ent_table = sq_get_html_translation_table(HTML_ENTITIES, $quote_style, $charset);
    // convert characters
    return str_replace(array_keys($sq_html_ent_table), array_values($sq_html_ent_table), $string);
}