public function testHTMLClean()
 {
     $cleaner = HTMLCleaner::inst();
     if ($cleaner) {
         $this->assertEquals($cleaner->cleanHTML('<p>wrong <b>nesting</i></p>'), '<p>wrong <b>nesting</b></p>', "HTML cleaned properly");
         $this->assertEquals($cleaner->cleanHTML('<p>unclosed paragraph'), '<p>unclosed paragraph</p>', "HTML cleaned properly");
     } else {
         $this->markTestSkipped('No HTMLCleaner library available (tidy or HTMLBeautifier)');
     }
 }
Ejemplo n.º 2
0
 /**
  *  Attempt to clean invalid HTML, which messes up diffs.
  *  This cleans code if possible, using an instance of HTMLCleaner
  *
  *  NB: By default, only extremely simple tidying is performed,
  *  by passing through DomDocument::loadHTML and saveXML
  *
  * @param string $content HTML content
  * @param HTMLCleaner $cleaner Optional instance of a HTMLCleaner class to
  *    use, overriding self::$html_cleaner_class
  * @return mixed|string
  */
 public static function cleanHTML($content, $cleaner = null)
 {
     if (!$cleaner) {
         if (self::$html_cleaner_class && class_exists(self::$html_cleaner_class)) {
             $cleaner = Injector::inst()->create(self::$html_cleaner_class);
         } else {
             //load cleaner if the dependent class is available
             $cleaner = HTMLCleaner::inst();
         }
     }
     if ($cleaner) {
         $content = $cleaner->cleanHTML($content);
     } else {
         // At most basic level of cleaning, use DOMDocument to save valid XML.
         $doc = Injector::inst()->create('HTMLValue', $content);
         $content = $doc->getContent();
     }
     // Remove empty <ins /> and <del /> tags because browsers hate them
     $content = preg_replace('/<(ins|del)[^>]*\\/>/', '', $content);
     return $content;
 }