/** * Returns the supplied input string after removing any characters not * present in the supplied whitelist. * * @param string $input string input to be filtered. * @param string $whitelist array or string of whitelist characters. * * @return string returns characters from $input that are present in $whitelist. */ public function whitelist($input, $whitelist) { // Sanity check if (!is_string($input) || $input == '') { $input = ''; } if (is_string($whitelist)) { $charEnc = Codec::detectEncoding($whitelist); $limit = mb_strlen($whitelist, $charEnc); $ary = array(); for ($i = 0; $i < $limit; $i++) { $ary[] = mb_substr($whitelist, $i, 1, $charEnc); } $whitelist = $ary; } $filtered = ''; $initialCharEnc = Codec::detectEncoding($input); $_4ByteCharacterString = Codec::normalizeEncoding($input); $limit = mb_strlen($_4ByteCharacterString, 'UTF-32'); for ($i = 0; $i < $limit; $i++) { $c = mb_substr($_4ByteCharacterString, $i, 1, 'UTF-32'); if (Codec::containsCharacter($c, $whitelist)) { $filtered .= $c; } } if ($filtered != '') { $filtered = mb_convert_encoding($filtered, $initialCharEnc, 'UTF-32'); } if (!is_string($filtered)) { $filtered = ''; } return $filtered; }
/** * Test of UnixCodec */ public function testUnixCodec() { $instance = ESAPI::getEncoder(); $codec_unix = new UnixCodec(); $this->assertEquals(null, $instance->encodeForOS($codec_unix, null)); $decoded = $codec_unix->decodeCharacter(Codec::normalizeEncoding("n")); $this->assertEquals(null, $decoded['decodedCharacter']); $decoded = $codec_unix->decodeCharacter(Codec::normalizeEncoding("")); $this->assertEquals(null, $decoded['decodedCharacter']); $immune = array(""); // not that it matters, but the java test would encode alphanums with such an immune param. $encoded = $codec_unix->encodeCharacter($immune, "<"); $decoded = $codec_unix->decode($encoded); $this->assertEquals("<", $decoded); $orig = "/etc/passwd"; $this->assertEquals($orig, $codec_unix->decode($orig)); $immune = array(); $orig = "/etc/passwd"; $encoded = $codec_unix->encode($immune, $orig); $this->assertEquals($orig, $codec_unix->decode($encoded)); // TODO: Check that this is acceptable for Unix hosts $this->assertEquals("c\\:\\\\jeff", $instance->encodeForOS($codec_unix, "c:\\jeff")); $this->assertEquals("c\\:\\\\jeff", $codec_unix->encode($immune, "c:\\jeff")); $this->assertEquals("dir\\ \\&\\ foo", $instance->encodeForOS($codec_unix, "dir & foo")); $this->assertEquals("dir\\ \\&\\ foo", $codec_unix->encode($immune, "dir & foo")); // Unix paths (that must be encoded safely) // TODO: Check that this is acceptable for Unix $this->assertEquals("\\/etc\\/hosts", $instance->encodeForOS($codec_unix, "/etc/hosts")); $this->assertEquals("\\/etc\\/hosts\\;\\ ls\\ -l", $instance->encodeForOS($codec_unix, "/etc/hosts; ls -l")); // these tests check that mixed character encoding is handled properly when encoding. $expected = '/^[a-zA-Z0-9\\/+]*={0,2}$/'; for ($i = 0; $i < 256; $i++) { $input = chr($i); $output = $instance->encodeForBase64($input); $this->assertRegExp($expected, $output, "Input was character with ordinal: {$i} - %s"); $this->assertEquals($input, $instance->decodeFromBase64($output)); } for ($i = 0; $i < 256; $i++) { $input = 'a' . chr($i); $output = $instance->encodeForBase64($input); $this->assertRegExp($expected, $output, "Input was 'a' concat with character with ordinal: {$i} - %s"); $this->assertEquals($input, $instance->decodeFromBase64($output)); } for ($i = 0; $i < 256; $i++) { $input = 'ϑ' . chr($i); $output = $instance->encodeForBase64($input); $this->assertRegExp($expected, $output, "Input was char known as 'ϑ' concat with character with ordinal: {$i} - %s"); $this->assertEquals($input, $instance->decodeFromBase64($output)); } }