/** * Cleans raw text removing nasties. * * Given raw text (eg typed in by a user) this function cleans it up and removes any nasty tags that could mess up * Moodle pages through XSS attacks. * * The result must be used as a HTML text fragment, this function can not cleanup random * parts of html tags such as url or src attributes. * * NOTE: the format parameter was deprecated because we can safely clean only HTML. * * @param string $text The text to be cleaned * @param int|string $format deprecated parameter, should always contain FORMAT_HTML or FORMAT_MOODLE * @param array $options Array of options; currently only option supported is 'allowid' (if true, * does not remove id attributes when cleaning) * @return string The cleaned up text */ function clean_text($text, $format = FORMAT_HTML, $options = array()) { $text = (string) $text; if ($format != FORMAT_HTML and $format != FORMAT_HTML) { // TODO: we need to standardise cleanup of text when loading it into editor first. // debugging('clean_text() is designed to work only with html');. } if ($format == FORMAT_PLAIN) { return $text; } if (is_purify_html_necessary($text)) { $text = purify_html($text, $options); } // Originally we tried to neutralise some script events here, it was a wrong approach because // it was trivial to work around that (for example using style based XSS exploits). // We must not give false sense of security here - all developers MUST understand how to use // rawurlencode(), htmlentities(), htmlspecialchars(), p(), s(), moodle_url, html_writer and friends!!! return $text; }
/** * Test internal function used for clean_text() speedup. * @return void */ function test_is_purify_html_necessary() { // first our shortcuts $text = ""; $this->assertFalse(is_purify_html_necessary($text)); $this->assertSame($text, purify_html($text)); $text = "666"; $this->assertFalse(is_purify_html_necessary($text)); $this->assertSame($text, purify_html($text)); $text = "abc\ndef \" ' "; $this->assertFalse(is_purify_html_necessary($text)); $this->assertSame($text, purify_html($text)); $text = "abc\n<p>def</p>efg<p>hij</p>"; $this->assertFalse(is_purify_html_necessary($text)); $this->assertSame($text, purify_html($text)); $text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>"; $this->assertFalse(is_purify_html_necessary($text)); $this->assertSame($text, purify_html($text)); // now failures $text = " "; $this->assertTrue(is_purify_html_necessary($text)); $text = "Gin & Tonic"; $this->assertTrue(is_purify_html_necessary($text)); $text = "Gin > Tonic"; $this->assertTrue(is_purify_html_necessary($text)); $text = "Gin < Tonic"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<div>abc</div>"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<span>abc</span>"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<br>abc"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<p class='xxx'>abc</p>"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<p>abc<em></p></em>"; $this->assertTrue(is_purify_html_necessary($text)); $text = "<p>abc"; $this->assertTrue(is_purify_html_necessary($text)); }