public function testHTMLClean() { $cleaner = HTMLCleaner::inst(); if ($cleaner) { $this->assertEquals($cleaner->cleanHTML('<p>wrong <b>nesting</i></p>'), '<p>wrong <b>nesting</b></p>', "HTML cleaned properly"); $this->assertEquals($cleaner->cleanHTML('<p>unclosed paragraph'), '<p>unclosed paragraph</p>', "HTML cleaned properly"); } else { $this->markTestSkipped('No HTMLCleaner library available (tidy or HTMLBeautifier)'); } }
/** * Attempt to clean invalid HTML, which messes up diffs. * This cleans code if possible, using an instance of HTMLCleaner * * NB: By default, only extremely simple tidying is performed, * by passing through DomDocument::loadHTML and saveXML * * @param string $content HTML content * @param object $cleaner Optional instance of a HTMLCleaner class to * use, overriding self::$html_cleaner_class */ public static function cleanHTML($content, $cleaner = null) { if (!$cleaner) { if (class_exists(self::$html_cleaner_class)) { $cleaner = new self::$html_cleaner_class(); } else { $cleaner = HTMLCleaner::inst(); //load cleaner if the dependent class is available } } if ($cleaner) { $content = $cleaner->cleanHTML($content); } else { // At most basic level of cleaning, use DOMDocument to save valid XML. $doc = new SS_HTMLValue($content); $content = $doc->getContent(); } // Remove empty <ins /> and <del /> tags because browsers hate them $content = preg_replace('/<(ins|del)[^>]*\\/>/', '', $content); return $content; }