/** * Given an SS_HTMLValue instance, will remove and elements and attributes that are * not explicitly included in the whitelist passed to __construct on instance creation * * @param HTMLValue $html - The HTMLValue to remove any non-whitelisted elements & attributes from */ public function sanitise(HTMLValue $html) { if (!$this->elements && !$this->elementPatterns) { return; } $doc = $html->getDocument(); /** @var DOMElement $el */ foreach ($html->query('//body//*') as $el) { $elementRule = $this->getRuleForElement($el->tagName); // If this element isn't allowed, strip it if (!$this->elementMatchesRule($el, $elementRule)) { // If it's a script or style, we don't keep contents if ($el->tagName === 'script' || $el->tagName === 'style') { $el->parentNode->removeChild($el); } else { // First, create a new fragment with all of $el's children moved into it $frag = $doc->createDocumentFragment(); while ($el->firstChild) { $frag->appendChild($el->firstChild); } // Then replace $el with the frags contents (which used to be it's children) $el->parentNode->replaceChild($frag, $el); } } else { // First, if we're supposed to pad & this element is empty, fix that if ($elementRule->paddEmpty && !$el->firstChild) { $el->nodeValue = ' '; } // Then filter out any non-whitelisted attributes $children = $el->attributes; $i = $children->length; while ($i--) { $attr = $children->item($i); $attributeRule = $this->getRuleForAttribute($elementRule, $attr->name); // If this attribute isn't allowed, strip it if (!$this->attributeMatchesRule($attr, $attributeRule)) { $el->removeAttributeNode($attr); } } // Then enforce any default attributes foreach ($elementRule->attributesDefault as $attr => $default) { if (!$el->getAttribute($attr)) { $el->setAttribute($attr, $default); } } // And any forced attributes foreach ($elementRule->attributesForced as $attr => $forced) { $el->setAttribute($attr, $forced); } } } }
/** * Replace the shortcodes in attribute values with the calculated content * * We don't use markers with attributes because there's no point, it's easier to do all the matching * in-DOM after the XML parse * * @param HTMLValue $htmlvalue */ protected function replaceAttributeTagsWithContent($htmlvalue) { $attributes = $htmlvalue->query('//@*[contains(.,"[")][contains(.,"]")]'); $parser = $this; for ($i = 0; $i < $attributes->length; $i++) { $node = $attributes->item($i); $tags = $this->extractTags($node->nodeValue); $extra = array('node' => $node, 'element' => $node->ownerElement); if ($tags) { $node->nodeValue = $this->replaceTagsWithText($node->nodeValue, $tags, function ($idx, $tag) use($parser, $extra) { return $parser->getShortcodeReplacementText($tag, $extra, false); }); } } }