function parse_all_html($html) { global $regexp_arrays; if (CONTENT_TYPE != 'text/html') { for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) { if ($key == CONTENT_TYPE) { foreach ($arr as $regarr) { if ($regarr == null) { continue; } $html = regular_express($regarr, $html); } } } return $html; } #if(!empty($_COOKIE[COOK_PREF.'_remove_scripts'])) $splitarr=array($html); $splitarr = preg_split('/(<!--(?!\\[if).*?-->|<style.*?<\\/style>|<script.*?<\\/script>)/is', $html, -1, PREG_SPLIT_DELIM_CAPTURE); unset($html); //define('REGEXP_SCRIPT_ONEVENT','( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))'); $firstrun = true; $firstjsrun = true; for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) { if ($key == 'text/javascript') { continue; } // OPTION1: use ONLY if no Javascript REGEXPS affect HTML sections // and all HTML modifying Javascript REGEXPS are performed after HTML regexps // This gives a pretty significant speed boost // If used, make sure "OPTION2" lines are commented, and other "OPTION1" lines AREN'T if ($firstjsrun && $key == 'application/x-javascript') { if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) { break; } $splitarr2 = array(); for ($i = 0; $i < count($splitarr); $i += 2) { $splitarr2[$i] = preg_split('/' . REGEXP_SCRIPT_ONEVENT . '/is', $splitarr[$i], -1, PREG_SPLIT_DELIM_CAPTURE); } } // END OPTION1 # firstrun remove scripts: on<event>s and noscript tags; also remove objects if ($firstrun && (!empty($_COOKIE[COOK_PREF . '_remove_scripts']) || !empty($_COOKIE[COOK_PREF . '_remove_objects']))) { for ($i = 0; $i < count($splitarr); $i += 2) { if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) { $splitarr[$i] = preg_replace('/(?:' . REGEXP_SCRIPT_ONEVENT . '|<.?noscript>)/is', null, $splitarr[$i]); } if (!empty($_COOKIE[COOK_PREF . '_remove_objects'])) { $splitarr[$i] = preg_replace('/<(embed|object).*?<\\/\\1>/is', null, $splitarr[$i]); } } } foreach ($arr as $regexp_array) { if ($regexp_array == null) { continue; } for ($i = 0; $i < count($splitarr); $i++) { # parse scripts for on<event>s if ($i % 2 == 0 && isset($splitarr2) && $regexp_array[1] == 2) { // OPTION1 // OPTION2 //if($regexp_array[1]==2 && $i%2==0){ // OPTION2 //$splitarr2[$i]=preg_split('/( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))/is',$splitarr[$i],-1,PREG_SPLIT_DELIM_CAPTURE); // END OPTION2 //if(count($splitarr2[$i])<2) $splitarr[$i]=regular_express($regexp_array,$splitarr[$i]); // UNRELATED TO OPTIONS if (count($splitarr2[$i]) > 1) { for ($j = 1; $j < count($splitarr2[$i]); $j += 2) { $begin = preg_replace('/^([^=]+=.).*$/i', '\\1', $splitarr2[$i][$j]); $quote = substr($begin, -1); if ($quote != '"' && $quote != '\'') { $quote = null; $begin = substr($begin, 0, -1); } $code = preg_replace('/^[^=]+=' . ($quote == null ? '(.*)$/i' : '.(.*).$/i'), '\\1', $splitarr2[$i][$j]); if (substr($code, 0, 11) == 'javascript:') { $begin .= 'javascript:'; $code = substr($code, 11); } if ($firstjsrun) { $code = ";{$code};"; } $splitarr2[$i][$j] = $begin . regular_express($regexp_array, $code) . $quote; } //$splitarr[$i]=implode(null,$splitarr2[$i]); // OPTION2 } } elseif ($firstrun && !empty($_COOKIE[COOK_PREF . '_remove_scripts']) && strtolower(substr($splitarr[$i], 0, 7)) == '<script') { $splitarr[$i] = null; } elseif ($i % 2 == 0 && $regexp_array[1] == 1 || $regexp_array[1] == 2 && strtolower(substr($splitarr[$i], 0, 7)) == '<script' || $key == 'text/css' && strtolower(substr($splitarr[$i], 0, 6)) == '<style') { $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]); } # script purge cleanup if ($firstrun && empty($_COOKIE[COOK_PREF . '_remove_scripts']) && strtolower(substr($splitarr[$i], -9)) == '</script>' && !preg_match('/^[^>]*src/i', $splitarr[$i])) { $splitarr[$i] = preg_replace('/' . END_OF_SCRIPT_TAG . '$/i', ';' . COOK_PREF . '.purge();//--></script>', $splitarr[$i]); } } $firstrun = false; if ($firstjsrun && $key == 'application/x-javascript') { $firstjsrun = false; } } } // OPTION1 if (empty($_COOKIE[COOK_PREF . '_remove_scripts'])) { for ($i = 0; $i < count($splitarr); $i += 2) { $splitarr[$i] = implode(null, $splitarr2[$i]); } } // END OPTION1 return implode(null, $splitarr); }
function parse_all_html($html) { global $regexp_arrays; if (CONTENT_TYPE != 'text/html') { for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) { if ($key == CONTENT_TYPE) { foreach ($arr as $regarr) { $html = regular_express($regarr, $html); } } } return $html; } if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) { $splitarr = array($html); } else { $splitarr = preg_split('/(<!--.*?-->|<style.*?<\\/style>|<script.*?<\\/script>)/is', $html, -1, PREG_SPLIT_DELIM_CAPTURE); } unset($html); for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) { if ($key == 'text/javascript') { continue; } foreach ($arr as $regexp_array) { for ($i = 0; $i < count($splitarr); $i += 1) { if ($regexp_array[1] == 2 && $i % 2 == 0) { $splitarr2 = preg_split('/( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))/is', $splitarr[$i], -1, PREG_SPLIT_DELIM_CAPTURE); if (count($splitarr2) < 2) { $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]); } else { for ($j = 1; $j < count($splitarr2); $j += 2) { $splitarr2[$j] = regular_express($regexp_array, $splitarr2[$j]); } $splitarr[$i] = implode(null, $splitarr2); } unset($splitarr2); } elseif ($regexp_array[1] == 1 && $i % 2 == 0 || strtolower(substr($splitarr[$i], 0, 7)) == '<script' && $regexp_array[1] == 2 || strtolower(substr($splitarr[$i], 0, 6)) == '<style' && $key == 'text/css') { $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]); } } } } return implode(null, $splitarr); }