Ejemplo n.º 1
 public function testInvalidHTMLSaving()
     $value = new SS_HTMLValue();
     $invalid = array('<p>Enclosed Value</p></p>' => '<p>Enclosed Value</p>', '<p><div class="example"></div></p>' => '<p/><div class="example"/>', '<html><html><body><falsetag "attribute=""attribute""">' => '<falsetag/>', '<body<body<body>/bodu>/body>' => '/bodu&gt;/body&gt;');
     foreach ($invalid as $input => $expected) {
         $this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be saved');
 public function testMixedNewlines()
     $value = new SS_HTMLValue();
     $eol = "\n";
     $platformEOL = PHP_EOL;
     // native EOL for platform. Windows is \r\n (CR-LF). UNIX is LF
     $this->assertEquals("<p>paragraph</p>{$eol}<ul><li>1</li>{$eol}</ul>", $value->getContent(), 'Newlines get converted');
  * Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
  * extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
  * @param SS_HTMLValue $htmlValue Object to parse the links from.
  * @return array Associative array containing found links with the following field layout:
  *		Type: string, name of the link type
  *		Target: any, a reference to the target object, depends on the Type
  *		Anchor: string, anchor part of the link
  *		DOMReference: DOMElement, reference to the link to apply changes.
  *		Broken: boolean, a flag highlighting whether the link should be treated as broken.
 public function process(SS_HTMLValue $htmlValue)
     $results = array();
     $links = $htmlValue->getElementsByTagName('a');
     if (!$links) {
         return $results;
     foreach ($links as $link) {
         if (!$link->hasAttribute('href')) {
         $href = Director::makeRelative($link->getAttribute('href'));
         // Definitely broken links.
         if ($href == '' || $href[0] == '/') {
             $results[] = array('Type' => 'broken', 'Target' => null, 'Anchor' => null, 'DOMReference' => $link, 'Broken' => true);
         // Link to a page on this site.
         $matches = array();
         if (preg_match('/\\[sitetree_link(?:\\s*|%20|,)?id=([0-9]+)\\](#(.*))?/i', $href, $matches)) {
             $page = DataObject::get_by_id('SiteTree', $matches[1]);
             if (!$page) {
                 // Page doesn't exist.
                 $broken = true;
             } else {
                 if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) {
                     // Broken anchor on the target page.
                     $broken = true;
                 } else {
                     $broken = false;
             $results[] = array('Type' => 'sitetree', 'Target' => $matches[1], 'Anchor' => empty($matches[3]) ? null : $matches[3], 'DOMReference' => $link, 'Broken' => $broken);
         // Link to a file on this site.
         $matches = array();
         if (preg_match('/\\[file_link(?:\\s*|%20|,)?id=([0-9]+)\\]/i', $href, $matches)) {
             $results[] = array('Type' => 'file', 'Target' => $matches[1], 'Anchor' => null, 'DOMReference' => $link, 'Broken' => !DataObject::get_by_id('File', $matches[1]));
         // Local anchor.
         $matches = array();
         if (preg_match('/^#(.*)/i', $href, $matches)) {
             $results[] = array('Type' => 'localanchor', 'Target' => null, 'Anchor' => $matches[1], 'DOMReference' => $link, 'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent()));
     return $results;
 public function saveInto($record)
     if ($record->escapeTypeForField($this->name) != 'xml') {
         throw new Exception('HtmlEditorField->saveInto(): This field should save into a HTMLText or HTMLVarchar field.');
     $linkedPages = array();
     $linkedFiles = array();
     $htmlValue = new SS_HTMLValue($this->value);
     // Populate link tracking for internal links & links to asset files.
     if ($links = $htmlValue->getElementsByTagName('a')) {
         foreach ($links as $link) {
             $href = Director::makeRelative($link->getAttribute('href'));
             if ($href) {
                 if (preg_match('/\\[sitetree_link id=([0-9]+)\\]/i', $href, $matches)) {
                     $ID = $matches[1];
                     // clear out any broken link classes
                     if ($class = $link->getAttribute('class')) {
                         $link->setAttribute('class', preg_replace('/(^ss-broken|ss-broken$| ss-broken )/', null, $class));
                     $linkedPages[] = $ID;
                     if (!DataObject::get_by_id('SiteTree', $ID)) {
                         $record->HasBrokenLink = true;
                 } else {
                     if (substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
                         $candidateFile = File::find(Convert::raw2sql(urldecode($href)));
                         if ($candidateFile) {
                             $linkedFiles[] = $candidateFile->ID;
                         } else {
                             $record->HasBrokenFile = true;
                     } else {
                         if ($href == '' || $href[0] == '/') {
                             $record->HasBrokenLink = true;
     // Resample images, add default attributes and add to assets tracking.
     if ($images = $htmlValue->getElementsByTagName('img')) {
         foreach ($images as $img) {
             // strip any ?r=n data from the src attribute
             $img->setAttribute('src', preg_replace('/([^\\?]*)\\?r=[0-9]+$/i', '$1', $img->getAttribute('src')));
             if (!($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src')))))) {
                 if (substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
                     $record->HasBrokenFile = true;
             // Resample the images if the width & height have changed.
             $width = $img->getAttribute('width');
             $height = $img->getAttribute('height');
             if ($image) {
                 if ($width && $height && ($width != $image->getWidth() || $height != $image->getHeight())) {
                     //Make sure that the resized image actually returns an image:
                     $resized = $image->ResizedImage($width, $height);
                     if ($resized) {
                         $img->setAttribute('src', $resized->getRelativePath());
             // Add default empty title & alt attributes.
             if (!$img->getAttribute('alt')) {
                 $img->setAttribute('alt', '');
             if (!$img->getAttribute('title')) {
                 $img->setAttribute('title', '');
             //If the src attribute is not set, then we won't add this to the list:
             if ($img->getAttribute('src')) {
                 // Add to the tracked files.
                 $linkedFiles[] = $image->ID;
     // Save file & link tracking data.
     if ($record->ID && $record->many_many('LinkTracking') && ($tracker = $record->LinkTracking())) {
         $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID);
         DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}");
         if ($linkedPages) {
             foreach ($linkedPages as $item) {
                 $SQL_fieldName = Convert::raw2sql($this->name);
                 DB::query("INSERT INTO \"SiteTree_LinkTracking\" (\"SiteTreeID\",\"ChildID\", \"FieldName\")\n\t\t\t\t\tVALUES ({$record->ID}, {$item}, '{$SQL_fieldName}')");
     if ($record->ID && $record->many_many('ImageTracking') && ($tracker = $record->ImageTracking())) {
         $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID);
         DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}");
         $fieldName = $this->name;
         if ($linkedFiles) {
             foreach ($linkedFiles as $item) {
                 $tracker->add($item, array('FieldName' => $this->name));
     $record->{$this->name} = $htmlValue->getContent();
Ejemplo n.º 5
 public function cleanHTML($content)
     $html = new HTMLPurifier();
     $doc = new SS_HTMLValue($html->purify($content));
     return $doc->getContent();
Ejemplo n.º 6
  *  Attempt to clean invalid HTML, which messes up diffs.
  *  This cleans code if possible, using an instance of HTMLCleaner
  *  NB: By default, only extremely simple tidying is performed,
  *  by passing through DomDocument::loadHTML and saveXML
  * @param string $content HTML content
  * @param object $cleaner Optional instance of a HTMLCleaner class to
  * 	use, overriding self::$html_cleaner_class
 public static function cleanHTML($content, $cleaner = null)
     if (!$cleaner) {
         if (class_exists(self::$html_cleaner_class)) {
             $cleaner = new self::$html_cleaner_class();
         } else {
             $cleaner = HTMLCleaner::inst();
             //load cleaner if the dependent class is available
     if ($cleaner) {
         $content = $cleaner->cleanHTML($content);
     } else {
         // At most basic level of cleaning, use DOMDocument to save valid XML.
         $doc = new SS_HTMLValue($content);
         $content = $doc->getContent();
     // Remove empty <ins /> and <del /> tags because browsers hate them
     $content = preg_replace('/<(ins|del)[^>]*\\/>/', '', $content);
     return $content;
Ejemplo n.º 7
 public function testMixedNewlines()
     $value = new SS_HTMLValue();
     $this->assertEquals("<p>paragraph</p>\n<ul><li>1</li>\n</ul>", $value->getContent(), 'Newlines get converted');
  * Cleans and returns XHTML which is needed for use in DOMDocument
  * @param type $content
  * @param type $encoding
  * @return string
 protected function tidy($content, $encoding = 'UTF-8')
     // Try to use the extension first
     if (extension_loaded('tidy')) {
         $tidy = tidy_parse_string($content, array('clean' => true, 'output-xhtml' => true, 'show-body-only' => false, 'wrap' => 0, 'input-encoding' => $encoding, 'output-encoding' => $encoding, 'doctype' => 'omit', 'anchor-as-name' => false));
         return $this->rewriteShortcodes('' . $tidy);
     // No PHP extension available, attempt to use CLI tidy.
     $retval = null;
     $output = null;
     @exec('tidy --version', $output, $retval);
     if ($retval === 0) {
         $tidy = '';
         $input = escapeshellarg($content);
         $encoding = str_replace('-', '', $encoding);
         $encoding = escapeshellarg($encoding);
         // Doesn't work on Windows, sorry, stick to the extension.
         $tidy = @`echo {$input} | tidy -q --show-body-only no --tidy-mark no --doctype omit --input-encoding {$encoding} --output-encoding {$encoding} --wrap 0 --anchor-as-name no --clean yes --output-xhtml yes`;
         return $this->rewriteShortcodes($tidy);
     // Fall back to default
     $doc = new SS_HTMLValue($content);
     return $doc->getContent();