public function testInvalidHTMLTagNames() { $value = new SS_HTMLValue(); $invalid = array('<p><div><a href="test-link"></p></div>', '<html><div><a href="test-link"></a></a></html_>', '""\'\'\'"""\'""<<<>/</<htmlbody><a href="test-link"<<>'); foreach ($invalid as $input) { $value->setContent($input); $this->assertEquals('test-link', $value->getElementsByTagName('a')->item(0)->getAttribute('href'), 'Link data can be extraced from malformed HTML'); } }
public function onBeforeSend() { $email = $this->owner; $letter = $email->Newsletter(); $body = new SS_HTMLValue($email->Body()->forTemplate()); $links = array(); $member = null; if (!$body || !$letter) { return; } if ($email->To()) { $member = DataObject::get_one('Member', sprintf('"Email" = \'%s\'', Convert::raw2sql($email->To()))); } // First build up a set of all the unique links within the newsletter, // along with the elements that link to them. foreach ($body->getElementsByTagName('a') as $link) { $href = $link->getAttribute('href'); if (strpos($href, '{$') !== false || strpos($href, 'mailto:') !== false) { // ignore links with keywords continue; } if (array_key_exists($href, $links)) { $links[$href][] = $link; } else { $links[$href] = array($link); } } // Then actually do the processing. Create a unique tracking object for // each link. Attempt to embed a member-specific tracking token if // the newsletter is being sent to a member. foreach ($links as $href => $elements) { $track = DataObject::get_one('Newsletter_TrackedLink', sprintf('"NewsletterID" = %d AND "Original" = \'%s\'', $letter->ID, Convert::raw2sql($href))); if (!$track) { $track = new Newsletter_TrackedLink(); $track->Original = $href; $track->NewsletterID = $letter->ID; $track->write(); } if ($member) { $trackHref = Controller::join_links(Director::baseURL(), 'newsletter-link', $member->NewsletterTrackingToken, $track->Hash); } else { $trackHref = Controller::join_links(Director::baseURL(), 'newsletter-link', $track->Hash); } foreach ($elements as $element) { $element->setAttribute('href', $trackHref); } } $dom = $body->getDocument(); $email->setBody(DBField::create('HTMLText', $dom->saveHTML())); }
/** * Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches * extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference). * * @param SS_HTMLValue $htmlValue Object to parse the links from. * @return array Associative array containing found links with the following field layout: * Type: string, name of the link type * Target: any, a reference to the target object, depends on the Type * Anchor: string, anchor part of the link * DOMReference: DOMElement, reference to the link to apply changes. * Broken: boolean, a flag highlighting whether the link should be treated as broken. */ public function process(SS_HTMLValue $htmlValue) { $results = array(); $links = $htmlValue->getElementsByTagName('a'); if (!$links) { return $results; } foreach ($links as $link) { if (!$link->hasAttribute('href')) { continue; } $href = Director::makeRelative($link->getAttribute('href')); // Definitely broken links. if ($href == '' || $href[0] == '/') { $results[] = array('Type' => 'broken', 'Target' => null, 'Anchor' => null, 'DOMReference' => $link, 'Broken' => true); continue; } // Link to a page on this site. $matches = array(); if (preg_match('/\\[sitetree_link(?:\\s*|%20|,)?id=([0-9]+)\\](#(.*))?/i', $href, $matches)) { $page = DataObject::get_by_id('SiteTree', $matches[1]); if (!$page) { // Page doesn't exist. $broken = true; } else { if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) { // Broken anchor on the target page. $broken = true; } else { $broken = false; } } $results[] = array('Type' => 'sitetree', 'Target' => $matches[1], 'Anchor' => empty($matches[3]) ? null : $matches[3], 'DOMReference' => $link, 'Broken' => $broken); continue; } // Link to a file on this site. $matches = array(); if (preg_match('/\\[file_link(?:\\s*|%20|,)?id=([0-9]+)\\]/i', $href, $matches)) { $results[] = array('Type' => 'file', 'Target' => $matches[1], 'Anchor' => null, 'DOMReference' => $link, 'Broken' => !DataObject::get_by_id('File', $matches[1])); continue; } // Local anchor. $matches = array(); if (preg_match('/^#(.*)/i', $href, $matches)) { $results[] = array('Type' => 'localanchor', 'Target' => null, 'Anchor' => $matches[1], 'DOMReference' => $link, 'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent())); continue; } } return $results; }
public function saveInto($record) { if ($record->escapeTypeForField($this->name) != 'xml') { throw new Exception('HtmlEditorField->saveInto(): This field should save into a HTMLText or HTMLVarchar field.'); } $linkedPages = array(); $linkedFiles = array(); $htmlValue = new SS_HTMLValue($this->value); // Populate link tracking for internal links & links to asset files. if ($links = $htmlValue->getElementsByTagName('a')) { foreach ($links as $link) { $href = Director::makeRelative($link->getAttribute('href')); if ($href) { if (preg_match('/\\[sitetree_link id=([0-9]+)\\]/i', $href, $matches)) { $ID = $matches[1]; // clear out any broken link classes if ($class = $link->getAttribute('class')) { $link->setAttribute('class', preg_replace('/(^ss-broken|ss-broken$| ss-broken )/', null, $class)); } $linkedPages[] = $ID; if (!DataObject::get_by_id('SiteTree', $ID)) { $record->HasBrokenLink = true; } } else { if (substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') { $candidateFile = File::find(Convert::raw2sql(urldecode($href))); if ($candidateFile) { $linkedFiles[] = $candidateFile->ID; } else { $record->HasBrokenFile = true; } } else { if ($href == '' || $href[0] == '/') { $record->HasBrokenLink = true; } } } } } } // Resample images, add default attributes and add to assets tracking. if ($images = $htmlValue->getElementsByTagName('img')) { foreach ($images as $img) { // strip any ?r=n data from the src attribute $img->setAttribute('src', preg_replace('/([^\\?]*)\\?r=[0-9]+$/i', '$1', $img->getAttribute('src'))); if (!($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src')))))) { if (substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') { $record->HasBrokenFile = true; } continue; } // Resample the images if the width & height have changed. $width = $img->getAttribute('width'); $height = $img->getAttribute('height'); if ($image) { if ($width && $height && ($width != $image->getWidth() || $height != $image->getHeight())) { //Make sure that the resized image actually returns an image: $resized = $image->ResizedImage($width, $height); if ($resized) { $img->setAttribute('src', $resized->getRelativePath()); } } } // Add default empty title & alt attributes. if (!$img->getAttribute('alt')) { $img->setAttribute('alt', ''); } if (!$img->getAttribute('title')) { $img->setAttribute('title', ''); } //If the src attribute is not set, then we won't add this to the list: if ($img->getAttribute('src')) { // Add to the tracked files. $linkedFiles[] = $image->ID; } } } // Save file & link tracking data. if ($record->ID && $record->many_many('LinkTracking') && ($tracker = $record->LinkTracking())) { $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID); DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}"); if ($linkedPages) { foreach ($linkedPages as $item) { $SQL_fieldName = Convert::raw2sql($this->name); DB::query("INSERT INTO \"SiteTree_LinkTracking\" (\"SiteTreeID\",\"ChildID\", \"FieldName\")\n\t\t\t\t\tVALUES ({$record->ID}, {$item}, '{$SQL_fieldName}')"); } } } if ($record->ID && $record->many_many('ImageTracking') && ($tracker = $record->ImageTracking())) { $filter = sprintf('"FieldName" = \'%s\' AND "SiteTreeID" = %d', $this->name, $record->ID); DB::query("DELETE FROM \"{$tracker->tableName}\" WHERE {$filter}"); $fieldName = $this->name; if ($linkedFiles) { foreach ($linkedFiles as $item) { $tracker->add($item, array('FieldName' => $this->name)); } } } $record->{$this->name} = $htmlValue->getContent(); }