function HTMLDocument($input, $insert = '', $inject = false, $footer = '') { if (strlen($input) > 65536) { if (version_compare(PHP_VERSION, '5.3.7') <= 0) { ini_set('pcre.backtrack_limit', 1000000); } } # # Apply parsing that only needs to be done once.. # # Record the charset global $charset; if (!isset($charset)) { $meta_equiv = preg_match('#(<meta[^>]*http\\-equiv\\s*=[^>]*>)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[0][0] : null; if (isset($meta_equiv)) { $charset = preg_match('#charset\\s*=\\s*["\']+([^"\'\\s>]*)#is', $meta_equiv, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][0] : null; } } if (!isset($charset)) { $meta_charset = preg_match('#<meta[^>]*charset\\s*=\\s*["\']+([^"\'\\s>]*)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][0] : null; if (isset($meta_charset)) { $charset = $meta_charset; } } # Remove empty script comments $input = preg_replace('#/\\*\\s*\\*/#s', '', $input); # Remove conditional comments $input = preg_replace('#<\\!\\-\\-\\[if \\!IE\\]>\\s*\\-\\->(.*?)<\\!\\[endif\\]\\-\\->#s', '$1', $input); $input = preg_replace('#<\\!\\-\\-\\[if.*?<\\!\\[endif\\]\\-\\->#s', '', $input); # Prevent websites from calling disableOverride() $input = preg_replace('#disableOverride#s', 'disabled___disableOverride', $input); # Prevent websites from making STUN requests $input = preg_replace('#RTCPeerConnection#s', 'disabled___RTCPeerConnection', $input); # Remove titles if option is enabled if ($this->htmlOptions['stripTitle'] || $this->htmlOptions['encodePage']) { $input = preg_replace('#<title.*?</title>#is', '', $input, 1); $input = preg_replace('#<meta[^>]*name=["\'](title|description|keywords)["\'][^>]*>#is', '', $input, 3); $input = preg_replace('#<link[^>]*rel=["\'](icon|shortcut icon)["\'][^>]*>#is', '', $input, 2); } # Remove and record a <base> href $input = preg_replace_callback('#<base href\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,2048}|[^\\\']{1,2048})|[^\\s"\\\'>]{1,2048}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1); # Proxy url= values in meta redirects $input = preg_replace_callback('#content\\s*=\\s*(["\\\'])?[0-9]+\\s*;\\s*url=([\\\'"]|&\\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1); # Process forms $input = preg_replace_callback('#<form([^>]*)>(.*?)</form>#is', 'html_form', $input); # Remove scripts blocks (avoids individual processing below) if ($this->htmlOptions['stripJS']) { $input = preg_replace('#<script[^>]*>.*?</script>#is', '', $input); } # # Split up the document into its different types and parse them # # Build up new document into this var $new = ''; $offset = 0; # Find instances of script or style blocks while (preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset)) { # What type of block is this? $block = strtolower($match[1][0]); # Start position of content $outerStart = $match[0][1]; $innerStart = $outerStart + strlen($match[0][0]); # Determine type of end tag and find it's position $endTag = "</{$block}>"; $innerEnd = stripos($input, $endTag, $innerStart); if ($innerEnd === false) { $endTag = "</"; $innerEnd = stripos($input, $endTag, $innerStart); if ($innerEnd === false) { $input = preg_replace('#<script[^>]*>.*?$#is', '', $input); break; } } $outerEnd = $innerEnd + strlen($endTag); # Parse everything up till here and add to the new document $new .= $this->HTML(substr($input, $offset, $innerStart - $offset)); # Find parsing function $parseFunction = $block == 'style' ? 'CSS' : 'JS'; # Add the parsed block $new .= $this->{$parseFunction}(substr($input, $innerStart, $innerEnd - $innerStart)); # Move offset to new position $offset = $innerEnd; } # And add the final chunk (between last script/style block and end of doc) $new .= $this->HTML(substr($input, $offset)); # Replace input with the updated document $input = $new; global $foundPlugin; if ($foundPlugin && function_exists('postParse')) { $input = postParse($input, 'html'); $foundPlugin = false; } # Make URLs relative $input = preg_replace('#=\\s*(["\'])?\\s*https?://[^"\'>/]*/#i', '=$1/', $input); # Encode the page if ($this->htmlOptions['encodePage']) { $input = encodePage($input); } # # Now add our own code bits # # Insert our mini form after the <body> if ($insert !== false) { # Check for a frameset if (($useFrames = stripos($input, '<frameset')) !== false) { # Flag the frames so only first displays mini-form $input = preg_replace_callback('#<frame[^>]+src\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,2048}|[^\\\']{1,2048})|[^\\s"\\\'>]{1,2048}))(?(1)\\1|)#i', 'html_flagFrames', $input); } # Attempt to add after body $input = preg_replace('#(<body[^>]*>)#i', '$1' . $insert, $input, 1, $tmp); # Check it inserted and append (if not a frameset) if (!$tmp && !$useFrames) { $input = $insert . $input; } } # Insert our javascript library if ($inject) { # Generate javascript to insert $inject = injectionJS(); # Add our proxy javascript after <head> $input = preg_replace('#(<head[^>]*>)#i', '$1' . $inject, $input, 1, $tmp); # If no <head>, just prepend if (!$tmp) { $input = $inject . $input; } } # Add anything to the footer? if ($footer) { $input = preg_replace('#(</body[^>]*>)#i', $footer . '$1', $input, 1, $tmp); # If no </body>, just append the footer if (!$tmp) { $input .= $footer; } } # Return new document return $input; }
# CSS file # CSS file case 'css': # Run through CSS parser $document = $parser->CSS($document); break; # Javascript file # Javascript file case 'javascript': # Run through javascript parser $document = $parser->JS($document); break; } # Apply postparsing from plugins if ($foundPlugin && function_exists('postParse')) { $document = postParse($document, $fetch->parseType); } # Send output if (!DEBUG_MODE) { # Do we want to gzip this? Yes, if all of the following are true: # - gzip option enabled # - client supports gzip # - zlib extension loaded # - output compression not automated if ($CONFIG['gzip_return'] && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') !== false && extension_loaded('zlib') && !ini_get('zlib.output_compression')) { # Send compressed (using level 3 compression - can be adjusted # to give smaller/larger files but will take longer/shorter time!) header('Content-Encoding: gzip'); echo gzencode($document, 3); } else { # Send uncompressed
# CSS document # CSS document case 'css': $fetch->return = parseCSS($fetch->return); break; # Javascript document # Javascript document case 'javascript': $fetch->return = parseJS($fetch->return); break; } # Strip badwords $fetch->return = str_replace($badWords, '####', $fetch->return); # Post parsing if (!empty($foundPlugin) && function_exists('postParse')) { $fetch->return = postParse($fetch->return, $fetch->docType); } # Print debug info if (DEBUG_MODE) { echo '<pre>', print_r($fetch, true), '</pre>'; ## Send output } else { # Do we want to compress? Yes if option is set, browser supports it, and zlib is available but compression not automated if (optGZIP && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') !== false && extension_loaded('zlib') && !ini_get('zlib.output_compression')) { echo ob_gzhandler($fetch->return, 5); } else { ## Send content-length header header('Content-Length: ' . strlen($fetch->return)); echo $fetch->return; } }