Ejemplo n.º 1
0
 public function testInvalidHTMLTagNames()
 {
     $value = new SS_HTMLValue();
     $invalid = array('<p><div><a href="test-link"></p></div>', '<html><div><a href="test-link"></a></a></html_>', '""\'\'\'"""\'""<<<>/</<htmlbody><a href="test-link"<<>');
     foreach ($invalid as $input) {
         $value->setContent($input);
         $this->assertEquals('test-link', $value->getElementsByTagName('a')->item(0)->getAttribute('href'), 'Link data can be extraced from malformed HTML');
     }
 }
 public function testMixedNewlines()
 {
     $value = new SS_HTMLValue();
     $eol = "\n";
     $platformEOL = PHP_EOL;
     // native EOL for platform. Windows is \r\n (CR-LF). UNIX is LF
     $value->setContent("<p>paragraph</p>{$platformEOL}<ul><li>1</li>\r\n</ul>");
     $this->assertEquals("<p>paragraph</p>{$eol}<ul><li>1</li>{$eol}</ul>", $value->getContent(), 'Newlines get converted');
 }
 public function onBeforeSend()
 {
     $email = $this->owner;
     $letter = $email->Newsletter();
     $body = new SS_HTMLValue($email->Body()->forTemplate());
     $links = array();
     $member = null;
     if (!$body || !$letter) {
         return;
     }
     if ($email->To()) {
         $member = DataObject::get_one('Member', sprintf('"Email" = \'%s\'', Convert::raw2sql($email->To())));
     }
     // First build up a set of all the unique links within the newsletter,
     // along with the elements that link to them.
     foreach ($body->getElementsByTagName('a') as $link) {
         $href = $link->getAttribute('href');
         if (strpos($href, '{$') !== false || strpos($href, 'mailto:') !== false) {
             // ignore links with keywords
             continue;
         }
         if (array_key_exists($href, $links)) {
             $links[$href][] = $link;
         } else {
             $links[$href] = array($link);
         }
     }
     // Then actually do the processing. Create a unique tracking object for
     // each link. Attempt to embed a member-specific tracking token if
     // the newsletter is being sent to a member.
     foreach ($links as $href => $elements) {
         $track = DataObject::get_one('Newsletter_TrackedLink', sprintf('"NewsletterID" = %d AND "Original" = \'%s\'', $letter->ID, Convert::raw2sql($href)));
         if (!$track) {
             $track = new Newsletter_TrackedLink();
             $track->Original = $href;
             $track->NewsletterID = $letter->ID;
             $track->write();
         }
         if ($member) {
             $trackHref = Controller::join_links(Director::baseURL(), 'newsletter-link', $member->NewsletterTrackingToken, $track->Hash);
         } else {
             $trackHref = Controller::join_links(Director::baseURL(), 'newsletter-link', $track->Hash);
         }
         foreach ($elements as $element) {
             $element->setAttribute('href', $trackHref);
         }
     }
     $dom = $body->getDocument();
     $email->setBody(DBField::create('HTMLText', $dom->saveHTML()));
 }
 /**
  * Given an SS_HTMLValue instance, will remove and elements and attributes that are
  * not explicitly included in the whitelist passed to __construct on instance creation
  *
  * @param SS_HTMLValue $html - The HTMLValue to remove any non-whitelisted elements & attributes from
  */
 public function sanitise(SS_HTMLValue $html)
 {
     if (!$this->elements && !$this->elementPatterns) {
         return;
     }
     $doc = $html->getDocument();
     foreach ($html->query('//body//*') as $el) {
         $elementRule = $this->getRuleForElement($el->tagName);
         // If this element isn't allowed, strip it
         if (!$this->elementMatchesRule($el, $elementRule)) {
             // If it's a script or style, we don't keep contents
             if ($el->tagName === 'script' || $el->tagName === 'style') {
                 $el->parentNode->removeChild($el);
             } else {
                 // First, create a new fragment with all of $el's children moved into it
                 $frag = $doc->createDocumentFragment();
                 while ($el->firstChild) {
                     $frag->appendChild($el->firstChild);
                 }
                 // Then replace $el with the frags contents (which used to be it's children)
                 $el->parentNode->replaceChild($frag, $el);
             }
         } else {
             // First, if we're supposed to pad & this element is empty, fix that
             if ($elementRule->paddEmpty && !$el->firstChild) {
                 $el->nodeValue = '&nbsp;';
             }
             // Then filter out any non-whitelisted attributes
             $children = $el->attributes;
             $i = $children->length;
             while ($i--) {
                 $attr = $children->item($i);
                 $attributeRule = $this->getRuleForAttribute($elementRule, $attr->name);
                 // If this attribute isn't allowed, strip it
                 if (!$this->attributeMatchesRule($attr, $attributeRule)) {
                     $el->removeAttributeNode($attr);
                 }
             }
             // Then enforce any default attributes
             foreach ($elementRule->attributesDefault as $attr => $default) {
                 if (!$el->getAttribute($attr)) {
                     $el->setAttribute($attr, $default);
                 }
             }
             // And any forced attributes
             foreach ($elementRule->attributesForced as $attr => $forced) {
                 $el->setAttribute($attr, $forced);
             }
         }
     }
 }
 /**
  * Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
  * extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
  *
  * @param SS_HTMLValue $htmlValue Object to parse the links from.
  * @return array Associative array containing found links with the following field layout:
  *		Type: string, name of the link type
  *		Target: any, a reference to the target object, depends on the Type
  *		Anchor: string, anchor part of the link
  *		DOMReference: DOMElement, reference to the link to apply changes.
  *		Broken: boolean, a flag highlighting whether the link should be treated as broken.
  */
 public function process(SS_HTMLValue $htmlValue)
 {
     $results = array();
     $links = $htmlValue->getElementsByTagName('a');
     if (!$links) {
         return $results;
     }
     foreach ($links as $link) {
         if (!$link->hasAttribute('href')) {
             continue;
         }
         $href = Director::makeRelative($link->getAttribute('href'));
         // Definitely broken links.
         if ($href == '' || $href[0] == '/') {
             $results[] = array('Type' => 'broken', 'Target' => null, 'Anchor' => null, 'DOMReference' => $link, 'Broken' => true);
             continue;
         }
         // Link to a page on this site.
         $matches = array();
         if (preg_match('/\\[sitetree_link(?:\\s*|%20|,)?id=([0-9]+)\\](#(.*))?/i', $href, $matches)) {
             $page = DataObject::get_by_id('SiteTree', $matches[1]);
             if (!$page) {
                 // Page doesn't exist.
                 $broken = true;
             } else {
                 if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) {
                     // Broken anchor on the target page.
                     $broken = true;
                 } else {
                     $broken = false;
                 }
             }
             $results[] = array('Type' => 'sitetree', 'Target' => $matches[1], 'Anchor' => empty($matches[3]) ? null : $matches[3], 'DOMReference' => $link, 'Broken' => $broken);
             continue;
         }
         // Link to a file on this site.
         $matches = array();
         if (preg_match('/\\[file_link(?:\\s*|%20|,)?id=([0-9]+)\\]/i', $href, $matches)) {
             $results[] = array('Type' => 'file', 'Target' => $matches[1], 'Anchor' => null, 'DOMReference' => $link, 'Broken' => !DataObject::get_by_id('File', $matches[1]));
             continue;
         }
         // Local anchor.
         $matches = array();
         if (preg_match('/^#(.*)/i', $href, $matches)) {
             $results[] = array('Type' => 'localanchor', 'Target' => null, 'Anchor' => $matches[1], 'DOMReference' => $link, 'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent()));
             continue;
         }
     }
     return $results;
 }
 public function saveInto($record)
 {
     if ($record->escapeTypeForField($this->name) != 'xml') {
         throw new Exception('HtmlEditorField->saveInto(): This field should save into a HTMLText or HTMLVarchar field.');
     }
     $linkedPages = array();
     $linkedFiles = array();
     $htmlValue = new SS_HTMLValue($this->value);
     // Populate link tracking for internal links & links to asset files.
     if ($links = $htmlValue->getElementsByTagName('a')) {
         foreach ($links as $link) {
             $href = Director::makeRelative($link->getAttribute('href'));
             if ($href) {
                 if (preg_match('/\\[sitetree_link id=([0-9]+)\\]/i', $href, $matches)) {
                     $ID = $matches[1];
                     // clear out any broken link classes
                     if ($class = $link->getAttribute('class')) {
                         $link->setAttribute('class', preg_replace('/(^ss-broken|ss-broken$| ss-broken )/', null, $class));
                     }
                     $linkedPages[] = $ID;
                     if (!DataObject::get_by_id('SiteTree', $ID)) {
                         $record->HasBrokenLink = true;
                     }
                 } else {
                     if (substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
                         $candidateFile = File::find(Convert::raw2sql(urldecode($href)));
                         if ($candidateFile) {
                             $linkedFiles[] = $candidateFile->ID;
                         } else {
                             $record->HasBrokenFile = true;
                         }
                     } else {
                         if ($href == '' || $href[0] == '/') {
                             $record->HasBrokenLink = true;
                         }
                     }
                 }
             }
         }
     }
     // Resample images, add default attributes and add to assets tracking.
     if ($images = $htmlValue->getElementsByTagName('img')) {
         foreach ($images as $img) {
             // strip any ?r=n data from the src attribute
             $img->setAttribute('src', preg_replace('/([^\\?]*)\\?r=[0-9]+$/i', '$1', $img->getAttribute('src')));
             if (!($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src')))))) {
                 if (substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
                     $record->HasBrokenFile = true;
                 }
                 continue;
             }
             // Resample the images if the width & height have changed.
             $width = $img->getAttribute('width');
             $height = $img->getAttribute('height');
             if ($image) {
                 if ($width && $height && ($width != $image->getWidth() || $height != $image->getHeight())) {
                     //Make sure that the resized image actually returns an image:
                     $resized = $image->ResizedImage($width, $height);
                     if ($resized) {
                         $img->setAttribute('src', $resized->getRelativePath());
                     }
                 }
             }
             // Add default empty title & alt attributes.
             if (!$img->getAttribute('alt')) {
                 $img->setAttribute('alt', '');
             }
             if (!$img->getAttribute('title')) {
                 $img->setAttribute('title', '');
             }
             //If the src attribute is not set, then we won't add this to the list:
             if ($img->getAttribute('src')) {
                 // Add to the tracked files.
                 $linkedFiles[] = $image->ID;
             }
         }
     }
     // Save file & link tracking data.
     if ($record->ID && $record->many_many('LinkTracking') && ($tracker = $record->LinkTracking())) {
         $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID);
         DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}");
         if ($linkedPages) {
             foreach ($linkedPages as $item) {
                 $SQL_fieldName = Convert::raw2sql($this->name);
                 DB::query("INSERT INTO \"SiteTree_LinkTracking\" (\"SiteTreeID\",\"ChildID\", \"FieldName\")\n\t\t\t\t\tVALUES ({$record->ID}, {$item}, '{$SQL_fieldName}')");
             }
         }
     }
     if ($record->ID && $record->many_many('ImageTracking') && ($tracker = $record->ImageTracking())) {
         $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID);
         DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}");
         $fieldName = $this->name;
         if ($linkedFiles) {
             foreach ($linkedFiles as $item) {
                 $tracker->add($item, array('FieldName' => $this->name));
             }
         }
     }
     $record->{$this->name} = $htmlValue->getContent();
 }
Ejemplo n.º 7
0
 public function cleanHTML($content)
 {
     $html = new HTMLPurifier();
     $doc = new SS_HTMLValue($html->purify($content));
     return $doc->getContent();
 }
Ejemplo n.º 8
0
 /**
  *  Attempt to clean invalid HTML, which messes up diffs.
  *  This cleans code if possible, using an instance of HTMLCleaner
  *
  *  NB: By default, only extremely simple tidying is performed,
  *  by passing through DomDocument::loadHTML and saveXML
  *
  * @param string $content HTML content
  * @param object $cleaner Optional instance of a HTMLCleaner class to
  * 	use, overriding self::$html_cleaner_class
  */
 public static function cleanHTML($content, $cleaner = null)
 {
     if (!$cleaner) {
         if (class_exists(self::$html_cleaner_class)) {
             $cleaner = new self::$html_cleaner_class();
         } else {
             $cleaner = HTMLCleaner::inst();
             //load cleaner if the dependent class is available
         }
     }
     if ($cleaner) {
         $content = $cleaner->cleanHTML($content);
     } else {
         // At most basic level of cleaning, use DOMDocument to save valid XML.
         $doc = new SS_HTMLValue($content);
         $content = $doc->getContent();
     }
     // Remove empty <ins /> and <del /> tags because browsers hate them
     $content = preg_replace('/<(ins|del)[^>]*\\/>/', '', $content);
     return $content;
 }
Ejemplo n.º 9
0
 public function testMixedNewlines()
 {
     $value = new SS_HTMLValue();
     $value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
     $this->assertEquals("<p>paragraph</p>\n<ul><li>1</li>\n</ul>", $value->getContent(), 'Newlines get converted');
 }
 /**
  * Cleans and returns XHTML which is needed for use in DOMDocument
  *
  * @param type $content
  * @param type $encoding
  * @return string
  */
 protected function tidy($content, $encoding = 'UTF-8')
 {
     // Try to use the extension first
     if (extension_loaded('tidy')) {
         $tidy = tidy_parse_string($content, array('clean' => true, 'output-xhtml' => true, 'show-body-only' => false, 'wrap' => 0, 'input-encoding' => $encoding, 'output-encoding' => $encoding, 'doctype' => 'omit', 'anchor-as-name' => false));
         $tidy->cleanRepair();
         return $this->rewriteShortcodes('' . $tidy);
     }
     // No PHP extension available, attempt to use CLI tidy.
     $retval = null;
     $output = null;
     @exec('tidy --version', $output, $retval);
     if ($retval === 0) {
         $tidy = '';
         $input = escapeshellarg($content);
         $encoding = str_replace('-', '', $encoding);
         $encoding = escapeshellarg($encoding);
         // Doesn't work on Windows, sorry, stick to the extension.
         $tidy = @`echo {$input} | tidy -q --show-body-only no --tidy-mark no --doctype omit --input-encoding {$encoding} --output-encoding {$encoding} --wrap 0 --anchor-as-name no --clean yes --output-xhtml yes`;
         return $this->rewriteShortcodes($tidy);
     }
     // Fall back to default
     $doc = new SS_HTMLValue($content);
     return $doc->getContent();
 }