Example #1
0
function parse_all_html($html)
{
    global $regexp_arrays;
    if (CONTENT_TYPE != 'text/html') {
        for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) {
            if ($key == CONTENT_TYPE) {
                foreach ($arr as $regarr) {
                    if ($regarr == null) {
                        continue;
                    }
                    $html = regular_express($regarr, $html);
                }
            }
        }
        return $html;
    }
    #if(!empty($_COOKIE[COOK_PREF.'_remove_scripts'])) $splitarr=array($html);
    $splitarr = preg_split('/(<!--(?!\\[if).*?-->|<style.*?<\\/style>|<script.*?<\\/script>)/is', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
    unset($html);
    //define('REGEXP_SCRIPT_ONEVENT','( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))');
    $firstrun = true;
    $firstjsrun = true;
    for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) {
        if ($key == 'text/javascript') {
            continue;
        }
        // OPTION1: use ONLY if no Javascript REGEXPS affect HTML sections
        // and all HTML modifying Javascript REGEXPS are performed after HTML regexps
        // This gives a pretty significant speed boost
        // If used, make sure "OPTION2" lines are commented, and other "OPTION1" lines AREN'T
        if ($firstjsrun && $key == 'application/x-javascript') {
            if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) {
                break;
            }
            $splitarr2 = array();
            for ($i = 0; $i < count($splitarr); $i += 2) {
                $splitarr2[$i] = preg_split('/' . REGEXP_SCRIPT_ONEVENT . '/is', $splitarr[$i], -1, PREG_SPLIT_DELIM_CAPTURE);
            }
        }
        // END OPTION1
        # firstrun remove scripts: on<event>s and noscript tags; also remove objects
        if ($firstrun && (!empty($_COOKIE[COOK_PREF . '_remove_scripts']) || !empty($_COOKIE[COOK_PREF . '_remove_objects']))) {
            for ($i = 0; $i < count($splitarr); $i += 2) {
                if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) {
                    $splitarr[$i] = preg_replace('/(?:' . REGEXP_SCRIPT_ONEVENT . '|<.?noscript>)/is', null, $splitarr[$i]);
                }
                if (!empty($_COOKIE[COOK_PREF . '_remove_objects'])) {
                    $splitarr[$i] = preg_replace('/<(embed|object).*?<\\/\\1>/is', null, $splitarr[$i]);
                }
            }
        }
        foreach ($arr as $regexp_array) {
            if ($regexp_array == null) {
                continue;
            }
            for ($i = 0; $i < count($splitarr); $i++) {
                # parse scripts for on<event>s
                if ($i % 2 == 0 && isset($splitarr2) && $regexp_array[1] == 2) {
                    // OPTION1
                    // OPTION2
                    //if($regexp_array[1]==2 && $i%2==0){ // OPTION2
                    //$splitarr2[$i]=preg_split('/( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))/is',$splitarr[$i],-1,PREG_SPLIT_DELIM_CAPTURE);
                    // END OPTION2
                    //if(count($splitarr2[$i])<2) $splitarr[$i]=regular_express($regexp_array,$splitarr[$i]); // UNRELATED TO OPTIONS
                    if (count($splitarr2[$i]) > 1) {
                        for ($j = 1; $j < count($splitarr2[$i]); $j += 2) {
                            $begin = preg_replace('/^([^=]+=.).*$/i', '\\1', $splitarr2[$i][$j]);
                            $quote = substr($begin, -1);
                            if ($quote != '"' && $quote != '\'') {
                                $quote = null;
                                $begin = substr($begin, 0, -1);
                            }
                            $code = preg_replace('/^[^=]+=' . ($quote == null ? '(.*)$/i' : '.(.*).$/i'), '\\1', $splitarr2[$i][$j]);
                            if (substr($code, 0, 11) == 'javascript:') {
                                $begin .= 'javascript:';
                                $code = substr($code, 11);
                            }
                            if ($firstjsrun) {
                                $code = ";{$code};";
                            }
                            $splitarr2[$i][$j] = $begin . regular_express($regexp_array, $code) . $quote;
                        }
                        //$splitarr[$i]=implode(null,$splitarr2[$i]); // OPTION2
                    }
                } elseif ($firstrun && !empty($_COOKIE[COOK_PREF . '_remove_scripts']) && strtolower(substr($splitarr[$i], 0, 7)) == '<script') {
                    $splitarr[$i] = null;
                } elseif ($i % 2 == 0 && $regexp_array[1] == 1 || $regexp_array[1] == 2 && strtolower(substr($splitarr[$i], 0, 7)) == '<script' || $key == 'text/css' && strtolower(substr($splitarr[$i], 0, 6)) == '<style') {
                    $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]);
                }
                # script purge cleanup
                if ($firstrun && empty($_COOKIE[COOK_PREF . '_remove_scripts']) && strtolower(substr($splitarr[$i], -9)) == '</script>' && !preg_match('/^[^>]*src/i', $splitarr[$i])) {
                    $splitarr[$i] = preg_replace('/' . END_OF_SCRIPT_TAG . '$/i', ';' . COOK_PREF . '.purge();//--></script>', $splitarr[$i]);
                }
            }
            $firstrun = false;
            if ($firstjsrun && $key == 'application/x-javascript') {
                $firstjsrun = false;
            }
        }
    }
    // OPTION1
    if (empty($_COOKIE[COOK_PREF . '_remove_scripts'])) {
        for ($i = 0; $i < count($splitarr); $i += 2) {
            $splitarr[$i] = implode(null, $splitarr2[$i]);
        }
    }
    // END OPTION1
    return implode(null, $splitarr);
}
Example #2
0
function parse_all_html($html)
{
    global $regexp_arrays;
    if (CONTENT_TYPE != 'text/html') {
        for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) {
            if ($key == CONTENT_TYPE) {
                foreach ($arr as $regarr) {
                    $html = regular_express($regarr, $html);
                }
            }
        }
        return $html;
    }
    if (!empty($_COOKIE[COOK_PREF . '_remove_scripts'])) {
        $splitarr = array($html);
    } else {
        $splitarr = preg_split('/(<!--.*?-->|<style.*?<\\/style>|<script.*?<\\/script>)/is', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
    }
    unset($html);
    for (reset($regexp_arrays); list($key, $arr) = each($regexp_arrays);) {
        if ($key == 'text/javascript') {
            continue;
        }
        foreach ($arr as $regexp_array) {
            for ($i = 0; $i < count($splitarr); $i += 1) {
                if ($regexp_array[1] == 2 && $i % 2 == 0) {
                    $splitarr2 = preg_split('/( on[a-z]{3,20}=(?:"(?:[^"]+)"|\'(?:[^\']+)\'|[^"\' >][^ >]+[^"\' >]))/is', $splitarr[$i], -1, PREG_SPLIT_DELIM_CAPTURE);
                    if (count($splitarr2) < 2) {
                        $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]);
                    } else {
                        for ($j = 1; $j < count($splitarr2); $j += 2) {
                            $splitarr2[$j] = regular_express($regexp_array, $splitarr2[$j]);
                        }
                        $splitarr[$i] = implode(null, $splitarr2);
                    }
                    unset($splitarr2);
                } elseif ($regexp_array[1] == 1 && $i % 2 == 0 || strtolower(substr($splitarr[$i], 0, 7)) == '<script' && $regexp_array[1] == 2 || strtolower(substr($splitarr[$i], 0, 6)) == '<style' && $key == 'text/css') {
                    $splitarr[$i] = regular_express($regexp_array, $splitarr[$i]);
                }
            }
        }
    }
    return implode(null, $splitarr);
}