Esempio n. 1
0
 function JS($input)
 {
     # Stripping?
     if ($this->htmlOptions['stripJS']) {
         return '';
     }
     # Get our flags
     $flags = $this->jsFlagState;
     # Unless we know we don't need to, apply all the browser-specific flags
     if (!is_array($this->jsFlagState)) {
         $flags = array('ajax', 'watch', 'setters');
     }
     # If override is disabled, add a "base" flag
     if ($this->jsFlagState === null) {
         $flags[] = 'base';
     }
     # Start parsing!
     $search = array();
     # Create shortcuts to various search patterns:
     #	  "before"	  - matches preceeding character (string of single char) [ignoring whitespace]
     #	  "after"	  - matches next character (string of single char) [ignoring whitespace]
     #	  "id"		  - key for identifying the original match (e.g. if we have >1 of the same key)
     $assignmentPattern = array('before' => '.', 'after' => '=');
     $methodPattern = array('before' => '.', 'after' => '(');
     $functionPattern = array('after' => '(');
     # Configure strings to search for, starting with always replaced commands
     $search['innerHTML'][] = $assignmentPattern;
     $search['location'][] = array('after' => '.', 'id' => 'replace()');
     # ^ This is only for location.replace() - other forms are handled later
     # Look for attribute assignments
     if (in_array('setters', $flags)) {
         $search['src'][] = $assignmentPattern;
         $search['href'][] = $assignmentPattern;
         $search['action'][] = $assignmentPattern;
         $search['background'][] = $assignmentPattern;
         $search['poster'][] = $assignmentPattern;
     }
     # Look for location changes
     # location.href will be handled above, location= is handled here
     if (in_array('watch', $flags)) {
         $search['location'][] = array('after' => '=', 'id' => 'assignment');
     }
     # Look for .open() if either AJAX (XMLHttpRequest.open) or
     # base (window.open) flags are present
     if (in_array('ajax', $flags) || in_array('base', $flags)) {
         $search['open'][] = $methodPattern;
     }
     # Add the basic code if no override
     if (in_array('base', $flags)) {
         $search['eval'][] = $functionPattern;
         $search['writeln'][] = $methodPattern;
         $search['write'][] = $methodPattern;
     }
     # Set up starting parameters
     $offset = 0;
     $length = strlen($input);
     $searchStrings = array_keys($search);
     while ($offset < $length) {
         # Start off by assuming no more items (i.e. the next position
         # of interest is the end of the document)
         $commandPos = $length;
         # Loop through the search subjects
         foreach ($searchStrings as $item) {
             # Any more instances of this?
             if (($tmp = strpos($input, $item, $offset)) === false) {
                 # Nope, skip to next item
                 continue;
             }
             # If $item is whole word?
             if ($input[$tmp - 1] == '_' || ctype_alpha($input[$tmp - 1])) {
                 # No
                 continue;
             }
             # Closer to the currently held 'next' position?
             if ($tmp < $commandPos) {
                 $commandPos = $tmp;
                 $command = $item;
             }
         }
         # No matches found? Finish parsing.
         if ($commandPos == $length) {
             break;
         }
         # We've found the main point of interest; now use the
         # search parameters to check the surrounding chars to validate
         # the match.
         $valid = false;
         foreach ($search[$command] as $pattern) {
             # Check the preceeding chars
             if (isset($pattern['before']) && str_checkprev($input, $pattern['before'], $commandPos - 1) === false) {
                 continue;
             }
             # Check next chars
             if (isset($pattern['after']) && ($charPos = str_checknext($input, $pattern['after'], $commandPos + strlen($command), false, false)) === false) {
                 continue;
             }
             $postCharPos = $charPos + 1 + strspn($input, " \t\r\n", $charPos + 1);
             # Still here? Match must be OK so generate a match ID
             if (isset($pattern['id'])) {
                 $valid = $command . $pattern['id'];
             } else {
                 $valid = $command;
             }
             break;
         }
         # What we do next depends on which match (if any) we've found...
         switch ($valid) {
             # Assigment
             case 'src':
             case 'href':
             case 'background':
             case 'poster':
             case 'action':
             case 'locationassignment':
             case 'innerHTML':
                 # Check our post-char position for = as well (could be equality
                 # test rather than assignment, i.e. == )
                 if (!isset($input[$postCharPos]) || $input[$postCharPos] == '=') {
                     break;
                 }
                 # Find the end of this statement
                 $endPos = analyzeAssign_js($input, $charPos);
                 $valueLength = $endPos - $postCharPos;
                 # Produce replacement command
                 $replacement = sprintf('parse%s(%s)', $command == 'innerHTML' ? 'HTML' : 'URL', substr($input, $postCharPos, $valueLength));
                 # Adjust total document length as appropriate
                 $length += strlen($replacement);
                 # Make the replacement
                 $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
                 # Move offset up to new position
                 $offset = $endPos + 10;
                 # Go get next match
                 continue 2;
                 # Function calls - we don't know for certain if these are in fact members of the
                 # appropriate objects (window/XMLHttpRequest for .open(), document for .write() and
                 # .writeln) so we won't change anything. Main.js still overrides these functions but
                 # does nothing with them by default. We add an extra parameter to tell our override
                 # to kick in.
             # Function calls - we don't know for certain if these are in fact members of the
             # appropriate objects (window/XMLHttpRequest for .open(), document for .write() and
             # .writeln) so we won't change anything. Main.js still overrides these functions but
             # does nothing with them by default. We add an extra parameter to tell our override
             # to kick in.
             case 'open':
             case 'write':
             case 'writeln':
                 # Find the end position (the closing ")" for the function call)
                 $endPos = analyze_js($input, $charPos);
                 # Insert our additional argument just before that
                 $glStr = ',"gl"';
                 if (strspn($input, ";\n\r\\+{}()[]", $charPos) >= $endPos - $charPos) {
                     $glStr = '"gl"';
                 }
                 $input = substr_replace($input, $glStr, $endPos - 1, 0);
                 # Adjust the document length
                 $length += strlen($glStr);
                 # And move the offset
                 $offset = $endPos + strlen($glStr);
                 # Get next match
                 continue 2;
                 # Eval() is a just as easy since we can just wrap the entire thing in parseJS().
             # Eval() is a just as easy since we can just wrap the entire thing in parseJS().
             case 'eval':
                 # Ensure this is a call to eval(), not anotherfunctionendingineval()
                 if (isset($input[$commandPos - 1]) && strpos('abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', $input[$commandPos - 1]) !== false) {
                     break;
                 }
                 # Find the end position (the closing ")" for the function call)
                 $endPos = analyze_js($input, $charPos);
                 $valueLength = $endPos - $postCharPos;
                 # Generate our replacement
                 $replacement = sprintf('parseJS(%s)', substr($input, $postCharPos, $valueLength));
                 # Make the replacement
                 $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
                 # Adjust the document length
                 $length += 9;
                 # And move the offset
                 $offset = $endPos + 9;
                 continue 2;
                 # location.replace() is a tricky one. We have the position of the char
                 # after . as $postCharPos and need to ensure we're calling replace(),
                 # then parse the entire URL
             # location.replace() is a tricky one. We have the position of the char
             # after . as $postCharPos and need to ensure we're calling replace(),
             # then parse the entire URL
             case 'locationreplace()':
                 # Validate the match
                 if (!preg_match('#\\Greplace\\s*\\(#', $input, $tmp, 0, $postCharPos)) {
                     break;
                 }
                 # Move $postCharPos to inside the brackets of .replace()
                 $postCharPos += strlen($tmp[0]);
                 # Find the end position (the closing ")" for the function call)
                 $endPos = analyze_js($input, $postCharPos);
                 $valueLength = $endPos - $postCharPos;
                 # Generate our replacement
                 $replacement = sprintf('parseURL(%s)', substr($input, $postCharPos, $valueLength));
                 # Make the replacement
                 $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
                 # Adjust the document length
                 $length += 9;
                 # And move the offset
                 $offset = $endPos + 9;
                 continue 2;
         }
         # Still here? A match didn't validate so adjust offset to just after
         # current position
         $offset = $commandPos + 1;
     }
     # Ignore document.domain
     $input = str_replace('document.domain', 'ignore', $input);
     # Return changed
     return $input;
 }
Esempio n. 2
0
 function JS($input)
 {
     // Stripping?
     if ($this->htmlOptions['stripJS']) {
         return '';
     }
     // If browser capabilities are unknown, apply all parsing
     if ($this->jsFlags === false) {
         $this->jsFlags = array('ajax', 'watch', 'setters');
     }
     // Start parsing!
     // Strings to search for
     $search[] = 'innerHTML';
     $search[] = 'location';
     // Look for attribute assignments
     if (in_array('setters', $this->jsFlags)) {
         $search[] = 'src';
         $search[] = 'href';
         $search[] = 'action';
         $search[] = 'background';
     }
     // Ajax operations - can't look for XMLHttpRequest.open() directly
     // since the object is likely to be renamed.
     if (in_array('ajax', $this->jsFlags)) {
         $search[] = 'XMLHttpRequest';
     }
     // Set up starting parameters
     $offset = 0;
     $length = strlen($input);
     while ($offset < $length) {
         // Start off by assuming no more items (i.e. the next position
         // of interest is the end of the document)
         $commandPos = $length;
         // Loop through the search subjects
         foreach ($search as $item) {
             // Any more instances of this?
             if (($tmp = strpos($input, $item, $offset)) === false) {
                 // Nope, skip to next item
                 continue;
             }
             // Closer to the currently held 'next' position?
             if ($tmp < $commandPos) {
                 $commandPos = $tmp;
                 $command = $item;
             }
         }
         // No matches found? Finish parsing.
         if ($commandPos == $length) {
             break;
         }
         // Assume no need to change the value by resetting the
         // position of the value to change
         $valuePos = false;
         // Validate the match and find the start of the value
         switch ($command) {
             // Either location.replace() or .location =
             case 'location':
                 // Check for location.replace() now, .location= will be handled
                 // by the same logic as used for all other assignments
                 if (($tmp = str_checknext($input, '.', $commandPos + strlen($command), true)) && substr($input, $tmp + 1, 7) == 'replace') {
                     // Now move inside the bracket ( +8 accounts for [.\s]replace )
                     if ($tmp = str_checknext($input, '(', $tmp + 8)) {
                         $valuePos = $tmp + 1;
                         // $tmp is position of the bracket so value is +1
                     }
                     // And we're done validating...
                     break;
                 }
                 // Not a .replace(), try .location= but only if .watch is unsupported
                 if (!in_array('watch', $this->jsFlags)) {
                     break;
                 }
                 // Assignments
             // Assignments
             case 'innerHTML':
             case 'src':
             case 'href':
             case 'background':
             case 'action':
                 // Is this a location? Special validation rules apply since location
                 // is a property of the window and can be accessed by "location" alone.
                 if ($command == 'location') {
                     // Difficult to be absolutely certain without much more complex parsing
                     // but look at the previous char - first reverse back to it
                     for ($tmp = $commandPos - 1; strspn($input[$tmp], "\r\n\t "); --$tmp) {
                     }
                     // Disallow commas (e.g. from the parameters of a window.open())
                     if ($input[$tmp] == ',') {
                         break;
                     }
                 } else {
                     if (str_checkprev($input, '.', $commandPos - 1) === false) {
                         // Everything else must be a property of another object so must be
                         // preceded by a period.
                         break;
                     }
                 }
                 // We've verified that the current command is a property.
                 // Now check that it's an assignment (as opposed to just a get)
                 if (($tmp = str_checknext($input, '=', $commandPos + strlen($command), true)) === false) {
                     break;
                 }
                 // Record the start position of the value
                 $valuePos = $tmp + 1;
                 // Validate the value start position by ensuring
                 // (A) the position exists!
                 // (B) the next char is not also an = (could be a test for equality)
                 if (!isset($input[$valuePos]) || $input[$valuePos] == '=') {
                     $valuePos = false;
                     break;
                 }
                 // We have verified the statement and found a start position.
                 break;
                 // Special cases
                 // XMLHttpRequest - we need to record any instances of the XMLHttpRequest object
                 // and only then can we attempt to find the .open() calls and proxify the URL
             // Special cases
             // XMLHttpRequest - we need to record any instances of the XMLHttpRequest object
             // and only then can we attempt to find the .open() calls and proxify the URL
             case 'XMLHttpRequest':
                 // Ensure we're creating a new instance of the object and attempt to find the
                 // assigned name
                 if (str_checkprev($input, 'w', $commandPos - 1) === false || !preg_match('#\\b([a-zA-Z0-9_-]{1,20})\\s*=\\s*new\\s+XMLHttpRequest$#', $length > 40 ? substr($input, $commandPos - 40, 54) : $input, $obj)) {
                     break;
                 }
                 // Check we haven't already processed this one
                 if (isset($ajaxObjects[$obj[1]])) {
                     break;
                 }
                 // And save it for next time
                 $ajaxObjects[$obj[1]] = true;
                 // Start at top
                 $openOffset = 0;
                 // Now proxify all calls to .open()
                 while (preg_match('#\\b' . $obj[1] . '\\s*\\.\\s*open\\s*\\(#', $input, $tmp, PREG_OFFSET_CAPTURE, $openOffset)) {
                     // Increase offset
                     $openOffset = $tmp[0][1] + strlen($tmp[0][0]);
                     // Find start/end positions for the URL (2nd parameter)
                     $pos = analyze_js($input, $openOffset, 2);
                     // Convert the end position to a length
                     $valueLength = $pos[1] - $pos[0];
                     // Wrap it in our URL parser
                     $wrapped = 'parseURL(' . substr($input, $pos[0], $valueLength) . ',"ajax")';
                     // Replace it
                     $input = substr_replace($input, $wrapped, $pos[0], $valueLength);
                     // And adjust the total length to compensate
                     $length += 17;
                     // Move offset past the newly replaced string
                     $openOffset += 17;
                 }
                 break;
         }
         // Make any changes?
         if ($valuePos) {
             // We know the start, now find the end
             $endPos = analyze_js($input, $valuePos);
             // And from that, the length
             $valueLength = $endPos - $valuePos;
             // Create the parsed wrapper
             $wrapped = ($command == 'innerHTML' ? 'parseHTML' : 'parseURL') . '(' . substr($input, $valuePos, $valueLength) . ')';
             // Make the substition
             $input = substr_replace($input, $wrapped, $valuePos, $valueLength);
             // And adjust the length
             $length += $command == 'innerHTML' ? 11 : 10;
         }
         // Move offset past the most recently found match
         if ($valuePos) {
             $offset = $endPos + 1;
         } else {
             $offset = $commandPos + strlen($command);
         }
     }
     // Ignore document.domain
     $input = str_replace('document.domain', 'ignore', $input);
     // Return changed
     return $input;
 }