function JS($input) { # Stripping? if ($this->htmlOptions['stripJS']) { return ''; } # Get our flags $flags = $this->jsFlagState; # Unless we know we don't need to, apply all the browser-specific flags if (!is_array($this->jsFlagState)) { $flags = array('ajax', 'watch', 'setters'); } # If override is disabled, add a "base" flag if ($this->jsFlagState === null) { $flags[] = 'base'; } # Start parsing! $search = array(); # Create shortcuts to various search patterns: # "before" - matches preceeding character (string of single char) [ignoring whitespace] # "after" - matches next character (string of single char) [ignoring whitespace] # "id" - key for identifying the original match (e.g. if we have >1 of the same key) $assignmentPattern = array('before' => '.', 'after' => '='); $methodPattern = array('before' => '.', 'after' => '('); $functionPattern = array('after' => '('); # Configure strings to search for, starting with always replaced commands $search['innerHTML'][] = $assignmentPattern; $search['location'][] = array('after' => '.', 'id' => 'replace()'); # ^ This is only for location.replace() - other forms are handled later # Look for attribute assignments if (in_array('setters', $flags)) { $search['src'][] = $assignmentPattern; $search['href'][] = $assignmentPattern; $search['action'][] = $assignmentPattern; $search['background'][] = $assignmentPattern; $search['poster'][] = $assignmentPattern; } # Look for location changes # location.href will be handled above, location= is handled here if (in_array('watch', $flags)) { $search['location'][] = array('after' => '=', 'id' => 'assignment'); } # Look for .open() if either AJAX (XMLHttpRequest.open) or # base (window.open) flags are present if (in_array('ajax', $flags) || in_array('base', $flags)) { $search['open'][] = $methodPattern; } # Add the basic code if no override if (in_array('base', $flags)) { $search['eval'][] = $functionPattern; $search['writeln'][] = $methodPattern; $search['write'][] = $methodPattern; } # Set up starting parameters $offset = 0; $length = strlen($input); $searchStrings = array_keys($search); while ($offset < $length) { # Start off by assuming no more items (i.e. the next position # of interest is the end of the document) $commandPos = $length; # Loop through the search subjects foreach ($searchStrings as $item) { # Any more instances of this? if (($tmp = strpos($input, $item, $offset)) === false) { # Nope, skip to next item continue; } # If $item is whole word? if ($input[$tmp - 1] == '_' || ctype_alpha($input[$tmp - 1])) { # No continue; } # Closer to the currently held 'next' position? if ($tmp < $commandPos) { $commandPos = $tmp; $command = $item; } } # No matches found? Finish parsing. if ($commandPos == $length) { break; } # We've found the main point of interest; now use the # search parameters to check the surrounding chars to validate # the match. $valid = false; foreach ($search[$command] as $pattern) { # Check the preceeding chars if (isset($pattern['before']) && str_checkprev($input, $pattern['before'], $commandPos - 1) === false) { continue; } # Check next chars if (isset($pattern['after']) && ($charPos = str_checknext($input, $pattern['after'], $commandPos + strlen($command), false, false)) === false) { continue; } $postCharPos = $charPos + 1 + strspn($input, " \t\r\n", $charPos + 1); # Still here? Match must be OK so generate a match ID if (isset($pattern['id'])) { $valid = $command . $pattern['id']; } else { $valid = $command; } break; } # What we do next depends on which match (if any) we've found... switch ($valid) { # Assigment case 'src': case 'href': case 'background': case 'poster': case 'action': case 'locationassignment': case 'innerHTML': # Check our post-char position for = as well (could be equality # test rather than assignment, i.e. == ) if (!isset($input[$postCharPos]) || $input[$postCharPos] == '=') { break; } # Find the end of this statement $endPos = analyzeAssign_js($input, $charPos); $valueLength = $endPos - $postCharPos; # Produce replacement command $replacement = sprintf('parse%s(%s)', $command == 'innerHTML' ? 'HTML' : 'URL', substr($input, $postCharPos, $valueLength)); # Adjust total document length as appropriate $length += strlen($replacement); # Make the replacement $input = substr_replace($input, $replacement, $postCharPos, $valueLength); # Move offset up to new position $offset = $endPos + 10; # Go get next match continue 2; # Function calls - we don't know for certain if these are in fact members of the # appropriate objects (window/XMLHttpRequest for .open(), document for .write() and # .writeln) so we won't change anything. Main.js still overrides these functions but # does nothing with them by default. We add an extra parameter to tell our override # to kick in. # Function calls - we don't know for certain if these are in fact members of the # appropriate objects (window/XMLHttpRequest for .open(), document for .write() and # .writeln) so we won't change anything. Main.js still overrides these functions but # does nothing with them by default. We add an extra parameter to tell our override # to kick in. case 'open': case 'write': case 'writeln': # Find the end position (the closing ")" for the function call) $endPos = analyze_js($input, $charPos); # Insert our additional argument just before that $glStr = ',"gl"'; if (strspn($input, ";\n\r\\+{}()[]", $charPos) >= $endPos - $charPos) { $glStr = '"gl"'; } $input = substr_replace($input, $glStr, $endPos - 1, 0); # Adjust the document length $length += strlen($glStr); # And move the offset $offset = $endPos + strlen($glStr); # Get next match continue 2; # Eval() is a just as easy since we can just wrap the entire thing in parseJS(). # Eval() is a just as easy since we can just wrap the entire thing in parseJS(). case 'eval': # Ensure this is a call to eval(), not anotherfunctionendingineval() if (isset($input[$commandPos - 1]) && strpos('abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', $input[$commandPos - 1]) !== false) { break; } # Find the end position (the closing ")" for the function call) $endPos = analyze_js($input, $charPos); $valueLength = $endPos - $postCharPos; # Generate our replacement $replacement = sprintf('parseJS(%s)', substr($input, $postCharPos, $valueLength)); # Make the replacement $input = substr_replace($input, $replacement, $postCharPos, $valueLength); # Adjust the document length $length += 9; # And move the offset $offset = $endPos + 9; continue 2; # location.replace() is a tricky one. We have the position of the char # after . as $postCharPos and need to ensure we're calling replace(), # then parse the entire URL # location.replace() is a tricky one. We have the position of the char # after . as $postCharPos and need to ensure we're calling replace(), # then parse the entire URL case 'locationreplace()': # Validate the match if (!preg_match('#\\Greplace\\s*\\(#', $input, $tmp, 0, $postCharPos)) { break; } # Move $postCharPos to inside the brackets of .replace() $postCharPos += strlen($tmp[0]); # Find the end position (the closing ")" for the function call) $endPos = analyze_js($input, $postCharPos); $valueLength = $endPos - $postCharPos; # Generate our replacement $replacement = sprintf('parseURL(%s)', substr($input, $postCharPos, $valueLength)); # Make the replacement $input = substr_replace($input, $replacement, $postCharPos, $valueLength); # Adjust the document length $length += 9; # And move the offset $offset = $endPos + 9; continue 2; } # Still here? A match didn't validate so adjust offset to just after # current position $offset = $commandPos + 1; } # Ignore document.domain $input = str_replace('document.domain', 'ignore', $input); # Return changed return $input; }
function JS($input) { // Stripping? if ($this->htmlOptions['stripJS']) { return ''; } // If browser capabilities are unknown, apply all parsing if ($this->jsFlags === false) { $this->jsFlags = array('ajax', 'watch', 'setters'); } // Start parsing! // Strings to search for $search[] = 'innerHTML'; $search[] = 'location'; // Look for attribute assignments if (in_array('setters', $this->jsFlags)) { $search[] = 'src'; $search[] = 'href'; $search[] = 'action'; $search[] = 'background'; } // Ajax operations - can't look for XMLHttpRequest.open() directly // since the object is likely to be renamed. if (in_array('ajax', $this->jsFlags)) { $search[] = 'XMLHttpRequest'; } // Set up starting parameters $offset = 0; $length = strlen($input); while ($offset < $length) { // Start off by assuming no more items (i.e. the next position // of interest is the end of the document) $commandPos = $length; // Loop through the search subjects foreach ($search as $item) { // Any more instances of this? if (($tmp = strpos($input, $item, $offset)) === false) { // Nope, skip to next item continue; } // Closer to the currently held 'next' position? if ($tmp < $commandPos) { $commandPos = $tmp; $command = $item; } } // No matches found? Finish parsing. if ($commandPos == $length) { break; } // Assume no need to change the value by resetting the // position of the value to change $valuePos = false; // Validate the match and find the start of the value switch ($command) { // Either location.replace() or .location = case 'location': // Check for location.replace() now, .location= will be handled // by the same logic as used for all other assignments if (($tmp = str_checknext($input, '.', $commandPos + strlen($command), true)) && substr($input, $tmp + 1, 7) == 'replace') { // Now move inside the bracket ( +8 accounts for [.\s]replace ) if ($tmp = str_checknext($input, '(', $tmp + 8)) { $valuePos = $tmp + 1; // $tmp is position of the bracket so value is +1 } // And we're done validating... break; } // Not a .replace(), try .location= but only if .watch is unsupported if (!in_array('watch', $this->jsFlags)) { break; } // Assignments // Assignments case 'innerHTML': case 'src': case 'href': case 'background': case 'action': // Is this a location? Special validation rules apply since location // is a property of the window and can be accessed by "location" alone. if ($command == 'location') { // Difficult to be absolutely certain without much more complex parsing // but look at the previous char - first reverse back to it for ($tmp = $commandPos - 1; strspn($input[$tmp], "\r\n\t "); --$tmp) { } // Disallow commas (e.g. from the parameters of a window.open()) if ($input[$tmp] == ',') { break; } } else { if (str_checkprev($input, '.', $commandPos - 1) === false) { // Everything else must be a property of another object so must be // preceded by a period. break; } } // We've verified that the current command is a property. // Now check that it's an assignment (as opposed to just a get) if (($tmp = str_checknext($input, '=', $commandPos + strlen($command), true)) === false) { break; } // Record the start position of the value $valuePos = $tmp + 1; // Validate the value start position by ensuring // (A) the position exists! // (B) the next char is not also an = (could be a test for equality) if (!isset($input[$valuePos]) || $input[$valuePos] == '=') { $valuePos = false; break; } // We have verified the statement and found a start position. break; // Special cases // XMLHttpRequest - we need to record any instances of the XMLHttpRequest object // and only then can we attempt to find the .open() calls and proxify the URL // Special cases // XMLHttpRequest - we need to record any instances of the XMLHttpRequest object // and only then can we attempt to find the .open() calls and proxify the URL case 'XMLHttpRequest': // Ensure we're creating a new instance of the object and attempt to find the // assigned name if (str_checkprev($input, 'w', $commandPos - 1) === false || !preg_match('#\\b([a-zA-Z0-9_-]{1,20})\\s*=\\s*new\\s+XMLHttpRequest$#', $length > 40 ? substr($input, $commandPos - 40, 54) : $input, $obj)) { break; } // Check we haven't already processed this one if (isset($ajaxObjects[$obj[1]])) { break; } // And save it for next time $ajaxObjects[$obj[1]] = true; // Start at top $openOffset = 0; // Now proxify all calls to .open() while (preg_match('#\\b' . $obj[1] . '\\s*\\.\\s*open\\s*\\(#', $input, $tmp, PREG_OFFSET_CAPTURE, $openOffset)) { // Increase offset $openOffset = $tmp[0][1] + strlen($tmp[0][0]); // Find start/end positions for the URL (2nd parameter) $pos = analyze_js($input, $openOffset, 2); // Convert the end position to a length $valueLength = $pos[1] - $pos[0]; // Wrap it in our URL parser $wrapped = 'parseURL(' . substr($input, $pos[0], $valueLength) . ',"ajax")'; // Replace it $input = substr_replace($input, $wrapped, $pos[0], $valueLength); // And adjust the total length to compensate $length += 17; // Move offset past the newly replaced string $openOffset += 17; } break; } // Make any changes? if ($valuePos) { // We know the start, now find the end $endPos = analyze_js($input, $valuePos); // And from that, the length $valueLength = $endPos - $valuePos; // Create the parsed wrapper $wrapped = ($command == 'innerHTML' ? 'parseHTML' : 'parseURL') . '(' . substr($input, $valuePos, $valueLength) . ')'; // Make the substition $input = substr_replace($input, $wrapped, $valuePos, $valueLength); // And adjust the length $length += $command == 'innerHTML' ? 11 : 10; } // Move offset past the most recently found match if ($valuePos) { $offset = $endPos + 1; } else { $offset = $commandPos + strlen($command); } } // Ignore document.domain $input = str_replace('document.domain', 'ignore', $input); // Return changed return $input; }