/** * Enhanced strip_tags() :: will revert also special entities like nbsp; and more * * @param ARRAY $yhtmlcode :: HTML Code to be stripped of tags * @param YES/NO $y_mode :: yes to convert <br> to new lines \n, otherwise (if no) will convert <br> to spaces * * @return STRING :: The processed HTML Code */ public static function striptags($yhtmlcode, $ynewline = 'yes') { //-- $yhtmlcode = (string) $yhtmlcode; $ynewline = (string) $ynewline; //-- fix xhtml tag ends and add spaces between tags $yhtmlcode = (string) str_replace(array(' />', '/>', '>'), array('>', '>', '> '), (string) $yhtmlcode); //-- remove special tags $html_regex_h = array('#<head[^>]*?>.*?</head[^>]*?>#si', '#<style[^>]*?>.*?</style[^>]*?>#si', '#<script[^>]*?>.*?</script[^>]*?>#si', '#<noscript[^>]*?>.*?</noscript[^>]*?>#si', '#<frameset[^>]*?>.*?</frameset[^>]*?>#si', '#<frame[^>]*?>.*?</frame[^>]*?>#si', '#<iframe[^>]*?>.*?</iframe[^>]*?>#si', '#<canvas[^>]*?>.*?</canvas[^>]*?>#si', '#<audio[^>]*?>.*?</audio[^>]*?>#si', '#<video[^>]*?>.*?</video[^>]*?>#si', '#<applet[^>]*?>.*?</applet[^>]*?>#si', '#<param[^>]*?>.*?</param[^>]*?>#si', '#<object[^>]*?>.*?</object[^>]*?>#si', '#<form[^>]*?>.*?</form[^>]*?>#si', '#<link[^>]*?>#si', '#<img[^>]*?>#si'); $html_regex_r = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '); $yhtmlcode = (string) preg_replace((array) $html_regex_h, (array) $html_regex_r, (string) $yhtmlcode); $yhtmlcode = str_replace(["\r\n", "\r", "\t", "\f"], ["\n", "\n", ' ', ' '], $yhtmlcode); //-- replace new line tags if ((string) $ynewline == 'yes') { $yhtmlcode = (string) str_ireplace(['<br>', '</br>'], ["\n", ''], (string) $yhtmlcode); } else { $yhtmlcode = (string) str_ireplace(['<br>', '</br>'], [' ', ''], (string) $yhtmlcode); } //end if else //-- strip the tags $yhtmlcode = (string) strip_tags((string) $yhtmlcode); //-- restore some usual html entities $regex_h = array(' ', '&', '"', '<', '>', '©', '€', '·'); $regex_r = array(' ', '&', '"', '<', '>', '(c)', 'EURO', '.'); $yhtmlcode = (string) str_ireplace((array) $regex_h, (array) $regex_r, (string) $yhtmlcode); $yhtmlcode = (string) strip_tags((string) $yhtmlcode); // fix: after all fixes when reversing entities, new tags can appear that were encoded, so needs run again for safety ... //-- restore html unicode entities $html_accents = (array) SmartUnicode::accented_html_entities(); $yhtmlcode = (string) str_replace((array) array_values($html_accents), (array) array_keys($html_accents), (string) $yhtmlcode); //-- clean any other remaining html entities $yhtmlcode = (string) preg_replace('/&\\#?([0-9a-z]+);/i', ' ', (string) $yhtmlcode); //-- cleanup multiple spaces with just one space $yhtmlcode = (string) preg_replace('/[ \\t]+/', ' ', (string) $yhtmlcode); // replace any horizontal whitespace character ' since PHP 5.2.4 can be /[\h]+/ $yhtmlcode = (string) preg_replace('/^\\s*[\\n]{2,}/m', '', (string) $yhtmlcode); // fix: replace multiple consecutive lines that may also contain before optional leading spaces $yhtmlcode = (string) preg_replace('/[^\\S\\r\\n]+$/m', '', (string) $yhtmlcode); // remove trailing spaces on each line //-- return (string) trim((string) $yhtmlcode); //-- }