/** * Finds first occurrence of 8bit data in the string * * Function finds first 8bit symbol or html entity that represents 8bit character. * Search start is defined by $offset argument. Search ends at $maxlength position. * If $maxlength is not defined or bigger than provided string, search ends when * string ends. * * Check returned data type in order to avoid confusion between bool(false) * (not found) and int(0) (first char in the string). * @param string $haystack * @param integer $offset * @param integer $maxlength * @return mixed integer with first 8bit character position or boolean false * @since 1.5.2 and 1.4.7 */ function sq_strpos_8bit($haystack, $offset = 0, $maxlength = false) { $ret = false; if ($maxlength === false || strlen($haystack) < $maxlength) { $maxlength = strlen($haystack); } for ($i = $offset; $i < $maxlength; $i++) { /* rh7-8 compatibility. don't use full 8bit range in regexp */ if (preg_match('/[\\200-\\237]|\\240|[\\241-\\377]/', $haystack[$i])) { /* we have 8bit char. stop here and return position */ $ret = $i; break; } elseif ($haystack[$i] == '&') { $substring = substr($haystack, $i); /** * 1. look for "&#(decimal number);" where decimal_number is bigger than 127 * 2. look for "&x(hexadecimal number);", where hex number is bigger than x7f * 3. look for any html character entity that is not 7bit html special char. Use * own sq_get_html_translation_table() function with 'utf-8' character set in * order to get all html entities. */ if (preg_match('/^&#(\\d+);/', $substring, $match) && $match[1] > 127 || preg_match('/^&x([0-9a-f]+);/i', $substring, $match) && $match[1] > "" || preg_match('/^&([a-z]+);/i', $substring, $match) && !in_array($match[0], get_html_translation_table(HTML_SPECIALCHARS)) && in_array($match[0], sq_get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'utf-8'))) { $ret = $i; break; } } } return $ret; }
/** * sq_htmlentities * * Convert all applicable characters to HTML entities. * Minimal php requirement - v.4.0.5 * * @param string $string string that has to be sanitized * @param integer $quote_style quote encoding style. Possible values (without quotes): * <ul> * <li>ENT_COMPAT - (default) encode double quotes</li> * <li>ENT_NOQUOTES - don't encode double or single quotes</li> * <li>ENT_QUOTES - encode double and single quotes</li> * </ul> * @param string $charset charset used for encoding. defaults to 'us-ascii', 'auto' uses $default_charset global value. * @return string sanitized string */ function sq_htmlentities($string, $quote_style = ENT_COMPAT, $charset = 'us-ascii') { // get translation table $sq_html_ent_table = sq_get_html_translation_table(HTML_ENTITIES, $quote_style, $charset); // convert characters return str_replace(array_keys($sq_html_ent_table), array_values($sq_html_ent_table), $string); }