コード例 #1
0
ファイル: parser.php プロジェクト: simonsmh/simonsmh.tk
 function HTMLDocument($input, $insert = '', $inject = false, $footer = '')
 {
     if (strlen($input) > 65536) {
         if (version_compare(PHP_VERSION, '5.3.7') <= 0) {
             ini_set('pcre.backtrack_limit', 1000000);
         }
     }
     #
     # Apply parsing that only needs to be done once..
     #
     # Record the charset
     global $charset;
     if (!isset($charset)) {
         $meta_equiv = preg_match('#(<meta[^>]*http\\-equiv\\s*=[^>]*>)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[0][0] : null;
         if (isset($meta_equiv)) {
             $charset = preg_match('#charset\\s*=\\s*["\']+([^"\'\\s>]*)#is', $meta_equiv, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][0] : null;
         }
     }
     if (!isset($charset)) {
         $meta_charset = preg_match('#<meta[^>]*charset\\s*=\\s*["\']+([^"\'\\s>]*)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][0] : null;
         if (isset($meta_charset)) {
             $charset = $meta_charset;
         }
     }
     # Remove empty script comments
     $input = preg_replace('#/\\*\\s*\\*/#s', '', $input);
     # Remove conditional comments
     $input = preg_replace('#<\\!\\-\\-\\[if \\!IE\\]>\\s*\\-\\->(.*?)<\\!\\[endif\\]\\-\\->#s', '$1', $input);
     $input = preg_replace('#<\\!\\-\\-\\[if.*?<\\!\\[endif\\]\\-\\->#s', '', $input);
     # Prevent websites from calling disableOverride()
     $input = preg_replace('#disableOverride#s', 'disabled___disableOverride', $input);
     # Prevent websites from making STUN requests
     $input = preg_replace('#RTCPeerConnection#s', 'disabled___RTCPeerConnection', $input);
     # Remove titles if option is enabled
     if ($this->htmlOptions['stripTitle'] || $this->htmlOptions['encodePage']) {
         $input = preg_replace('#<title.*?</title>#is', '', $input, 1);
         $input = preg_replace('#<meta[^>]*name=["\'](title|description|keywords)["\'][^>]*>#is', '', $input, 3);
         $input = preg_replace('#<link[^>]*rel=["\'](icon|shortcut icon)["\'][^>]*>#is', '', $input, 2);
     }
     # Remove and record a <base> href
     $input = preg_replace_callback('#<base href\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,2048}|[^\\\']{1,2048})|[^\\s"\\\'>]{1,2048}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1);
     # Proxy url= values in meta redirects
     $input = preg_replace_callback('#content\\s*=\\s*(["\\\'])?[0-9]+\\s*;\\s*url=([\\\'"]|&\\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1);
     # Process forms
     $input = preg_replace_callback('#<form([^>]*)>(.*?)</form>#is', 'html_form', $input);
     # Remove scripts blocks (avoids individual processing below)
     if ($this->htmlOptions['stripJS']) {
         $input = preg_replace('#<script[^>]*>.*?</script>#is', '', $input);
     }
     #
     # Split up the document into its different types and parse them
     #
     # Build up new document into this var
     $new = '';
     $offset = 0;
     # Find instances of script or style blocks
     while (preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset)) {
         # What type of block is this?
         $block = strtolower($match[1][0]);
         # Start position of content
         $outerStart = $match[0][1];
         $innerStart = $outerStart + strlen($match[0][0]);
         # Determine type of end tag and find it's position
         $endTag = "</{$block}>";
         $innerEnd = stripos($input, $endTag, $innerStart);
         if ($innerEnd === false) {
             $endTag = "</";
             $innerEnd = stripos($input, $endTag, $innerStart);
             if ($innerEnd === false) {
                 $input = preg_replace('#<script[^>]*>.*?$#is', '', $input);
                 break;
             }
         }
         $outerEnd = $innerEnd + strlen($endTag);
         # Parse everything up till here and add to the new document
         $new .= $this->HTML(substr($input, $offset, $innerStart - $offset));
         # Find parsing function
         $parseFunction = $block == 'style' ? 'CSS' : 'JS';
         # Add the parsed block
         $new .= $this->{$parseFunction}(substr($input, $innerStart, $innerEnd - $innerStart));
         # Move offset to new position
         $offset = $innerEnd;
     }
     # And add the final chunk (between last script/style block and end of doc)
     $new .= $this->HTML(substr($input, $offset));
     # Replace input with the updated document
     $input = $new;
     global $foundPlugin;
     if ($foundPlugin && function_exists('postParse')) {
         $input = postParse($input, 'html');
         $foundPlugin = false;
     }
     # Make URLs relative
     $input = preg_replace('#=\\s*(["\'])?\\s*https?://[^"\'>/]*/#i', '=$1/', $input);
     # Encode the page
     if ($this->htmlOptions['encodePage']) {
         $input = encodePage($input);
     }
     #
     # Now add our own code bits
     #
     # Insert our mini form after the <body>
     if ($insert !== false) {
         # Check for a frameset
         if (($useFrames = stripos($input, '<frameset')) !== false) {
             # Flag the frames so only first displays mini-form
             $input = preg_replace_callback('#<frame[^>]+src\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,2048}|[^\\\']{1,2048})|[^\\s"\\\'>]{1,2048}))(?(1)\\1|)#i', 'html_flagFrames', $input);
         }
         # Attempt to add after body
         $input = preg_replace('#(<body[^>]*>)#i', '$1' . $insert, $input, 1, $tmp);
         # Check it inserted and append (if not a frameset)
         if (!$tmp && !$useFrames) {
             $input = $insert . $input;
         }
     }
     # Insert our javascript library
     if ($inject) {
         # Generate javascript to insert
         $inject = injectionJS();
         # Add our proxy javascript after <head>
         $input = preg_replace('#(<head[^>]*>)#i', '$1' . $inject, $input, 1, $tmp);
         # If no <head>, just prepend
         if (!$tmp) {
             $input = $inject . $input;
         }
     }
     # Add anything to the footer?
     if ($footer) {
         $input = preg_replace('#(</body[^>]*>)#i', $footer . '$1', $input, 1, $tmp);
         # If no </body>, just append the footer
         if (!$tmp) {
             $input .= $footer;
         }
     }
     # Return new document
     return $input;
 }
コード例 #2
0
ファイル: parser.php プロジェクト: alberthier/fullmetalgalaxy
 function HTMLDocument($input, $insert = '', $inject = false, $footer = '')
 {
     //
     // Apply parsing that only needs to be done once..
     //
     // Remove titles if option is enabled
     if ($this->htmlOptions['stripTitle']) {
         $input = preg_replace('#<title.*?</title>#is', '', $input, 1);
     }
     // Remove and record a <base> href
     $input = preg_replace_callback('#<base href\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\\s"\\\'>]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1);
     // Proxify url= values in meta redirects
     $input = preg_replace_callback('#content\\s*=\\s*(["\\\'])?[0-9]+\\s*;\\s*url=([\\\'"]|&\\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1);
     // Process forms
     $input = preg_replace_callback('#<form([^>]*)>(.*?)</form>#is', 'html_form', $input);
     // Remove scripts blocks (avoids individual processing below)
     if ($this->htmlOptions['stripJS']) {
         $input = preg_replace('#<script[^>]*>.*?</script>#is', '', $input);
     }
     //
     // Split up the document into its different types and parse them
     //
     // Build up new document into this var
     $new = '';
     $offset = 0;
     // Find instances of script or style blocks
     while (preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset)) {
         // What type of block is this?
         $block = strtolower($match[1][0]);
         // Start position of content
         $outerStart = $match[0][1];
         $innerStart = $outerStart + strlen($match[0][0]);
         // Determine type of end tag and find it's position
         $endTag = "</{$block}>";
         $innerEnd = stripos($input, $endTag, $innerStart);
         $outerEnd = $innerEnd + strlen($endTag);
         // Parse everything up till here and add to the new document
         $new .= $this->HTML(substr($input, $offset, $innerStart - $offset));
         // Find parsing function
         $parseFunction = $block == 'style' ? 'CSS' : 'JS';
         // Add the parsed block
         $new .= $this->{$parseFunction}(substr($input, $innerStart, $innerEnd - $innerStart));
         // Move offset to new position
         $offset = $innerEnd;
     }
     // And add the final chunk (between last script/style block and end of doc)
     $new .= $this->HTML(substr($input, $offset));
     // Replace input with the updated document
     $input = $new;
     // Encode the page
     if ($this->htmlOptions['encodePage']) {
         $input = encodePage($input);
     }
     //
     // Now add our own code bits
     //
     // Insert our mini form after the <body>
     if ($insert !== false) {
         // Check for a frameset
         if (($useFrames = stripos($input, '<frameset')) !== false) {
             // Flag the frames so only first displays mini-form
             $input = preg_replace_callback('#<frame[^>]+src\\s*=\\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_flagFrames', $input);
         }
         // Attempt to add after body
         $input = preg_replace('#(<body[^>]*>)#i', '$1' . $insert, $input, 1, $tmp);
         // Check it inserted and append (if not a frameset)
         if (!$tmp && !$useFrames) {
             $input = $insert . $input;
         }
     }
     // Insert our javascript library
     if ($inject) {
         // Generate javascript to insert
         $inject = injectionJS();
         // Add our proxy javascript after <head>
         $input = preg_replace('#(<head[^>]*>)#i', '$1' . $inject, $input, 1, $tmp);
         // If no <head>, just prepend
         if (!$tmp) {
             $input = $inject . $input;
         }
     }
     // Add anything to the footer?
     if ($footer) {
         $input = preg_replace('#(</body[^>]*>)#i', $footer . '$1', $input, 1, $tmp);
         // If no </body>, just append the footer
         if (!$tmp) {
             $input .= $footer;
         }
     }
     // Return new document
     return $input;
 }