示例#1
0
 /**
  * Enhanced strip_tags() :: will revert also special entities like nbsp; and more
  *
  * @param ARRAY 		$yhtmlcode		:: HTML Code to be stripped of tags
  * @param YES/NO 		$y_mode			:: yes to convert <br> to new lines \n, otherwise (if no) will convert <br> to spaces
  *
  * @return STRING 						:: The processed HTML Code
  */
 public static function striptags($yhtmlcode, $ynewline = 'yes')
 {
     //--
     $yhtmlcode = (string) $yhtmlcode;
     $ynewline = (string) $ynewline;
     //-- fix xhtml tag ends and add spaces between tags
     $yhtmlcode = (string) str_replace(array(' />', '/>', '>'), array('>', '>', '> '), (string) $yhtmlcode);
     //-- remove special tags
     $html_regex_h = array('#<head[^>]*?>.*?</head[^>]*?>#si', '#<style[^>]*?>.*?</style[^>]*?>#si', '#<script[^>]*?>.*?</script[^>]*?>#si', '#<noscript[^>]*?>.*?</noscript[^>]*?>#si', '#<frameset[^>]*?>.*?</frameset[^>]*?>#si', '#<frame[^>]*?>.*?</frame[^>]*?>#si', '#<iframe[^>]*?>.*?</iframe[^>]*?>#si', '#<canvas[^>]*?>.*?</canvas[^>]*?>#si', '#<audio[^>]*?>.*?</audio[^>]*?>#si', '#<video[^>]*?>.*?</video[^>]*?>#si', '#<applet[^>]*?>.*?</applet[^>]*?>#si', '#<param[^>]*?>.*?</param[^>]*?>#si', '#<object[^>]*?>.*?</object[^>]*?>#si', '#<form[^>]*?>.*?</form[^>]*?>#si', '#<link[^>]*?>#si', '#<img[^>]*?>#si');
     $html_regex_r = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
     $yhtmlcode = (string) preg_replace((array) $html_regex_h, (array) $html_regex_r, (string) $yhtmlcode);
     $yhtmlcode = str_replace(["\r\n", "\r", "\t", "\f"], ["\n", "\n", ' ', ' '], $yhtmlcode);
     //-- replace new line tags
     if ((string) $ynewline == 'yes') {
         $yhtmlcode = (string) str_ireplace(['<br>', '</br>'], ["\n", ''], (string) $yhtmlcode);
     } else {
         $yhtmlcode = (string) str_ireplace(['<br>', '</br>'], [' ', ''], (string) $yhtmlcode);
     }
     //end if else
     //-- strip the tags
     $yhtmlcode = (string) strip_tags((string) $yhtmlcode);
     //-- restore some usual html entities
     $regex_h = array('&nbsp;', '&amp;', '&quot;', '&lt;', '&gt;', '&copy;', '&euro;', '&middot;');
     $regex_r = array(' ', '&', '"', '<', '>', '(c)', 'EURO', '.');
     $yhtmlcode = (string) str_ireplace((array) $regex_h, (array) $regex_r, (string) $yhtmlcode);
     $yhtmlcode = (string) strip_tags((string) $yhtmlcode);
     // fix: after all fixes when reversing entities, new tags can appear that were encoded, so needs run again for safety ...
     //-- restore html unicode entities
     $html_accents = (array) SmartUnicode::accented_html_entities();
     $yhtmlcode = (string) str_replace((array) array_values($html_accents), (array) array_keys($html_accents), (string) $yhtmlcode);
     //-- clean any other remaining html entities
     $yhtmlcode = (string) preg_replace('/&\\#?([0-9a-z]+);/i', ' ', (string) $yhtmlcode);
     //-- cleanup multiple spaces with just one space
     $yhtmlcode = (string) preg_replace('/[ \\t]+/', ' ', (string) $yhtmlcode);
     // replace any horizontal whitespace character ' since PHP 5.2.4 can be /[\h]+/
     $yhtmlcode = (string) preg_replace('/^\\s*[\\n]{2,}/m', '', (string) $yhtmlcode);
     // fix: replace multiple consecutive lines that may also contain before optional leading spaces
     $yhtmlcode = (string) preg_replace('/[^\\S\\r\\n]+$/m', '', (string) $yhtmlcode);
     // remove trailing spaces on each line
     //--
     return (string) trim((string) $yhtmlcode);
     //--
 }