/** * @covers ::createPlaceholder * @dataProvider providerCreatePlaceholderGeneratesValidHtmlMarkup * * Ensure that the generated placeholder markup is valid. If it is not, then * simply using DOMDocument on HTML that contains placeholders may modify the * placeholders' markup, which would make it impossible to replace the * placeholders: the placeholder markup in #attached versus that in the HTML * processed by DOMDocument would no longer match. */ public function testCreatePlaceholderGeneratesValidHtmlMarkup(array $element) { $build = $this->placeholderGenerator->createPlaceholder($element); $original_placeholder_markup = (string) $build['#markup']; $processed_placeholder_markup = Html::serialize(Html::load($build['#markup'])); $this->assertEquals($original_placeholder_markup, $processed_placeholder_markup); }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (stristr($text, 'data-entity-type="file"') !== FALSE) { $dom = Html::load($text); $xpath = new \DOMXPath($dom); $processed_uuids = array(); foreach ($xpath->query('//*[@data-entity-type="file" and @data-entity-uuid]') as $node) { $uuid = $node->getAttribute('data-entity-uuid'); // If there is a 'src' attribute, set it to the file entity's current // URL. This ensures the URL works even after the file location changes. if ($node->hasAttribute('src')) { $file = $this->entityManager->loadEntityByUuid('file', $uuid); if ($file) { $node->setAttribute('src', file_url_transform_relative(file_create_url($file->getFileUri()))); } } // Only process the first occurrence of each file UUID. if (!isset($processed_uuids[$uuid])) { $processed_uuids[$uuid] = TRUE; $file = $this->entityManager->loadEntityByUuid('file', $uuid); if ($file) { $result->addCacheTags($file->getCacheTags()); } } } $result->setProcessedText(Html::serialize($dom)); } return $result; }
/** * {@inheritdoc} */ public function process($text, $langcode) { $document = Html::load($text); foreach ($this->settings['tags'] as $tag) { $tag_elements = $document->getElementsByTagName($tag); foreach ($tag_elements as $tag_element) { $tag_element->setAttribute('test_attribute', 'test attribute value'); } } return new FilterProcessResult(Html::serialize($document)); }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (stristr($text, 'data-caption') !== FALSE) { $dom = Html::load($text); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//*[@data-caption]') as $node) { // Read the data-caption attribute's value, then delete it. $caption = Html::escape($node->getAttribute('data-caption')); $node->removeAttribute('data-caption'); // Sanitize caption: decode HTML encoding, limit allowed HTML tags; only // allow inline tags that are allowed by default, plus <br>. $caption = Html::decodeEntities($caption); $caption = FilteredMarkup::create(Xss::filter($caption, array('a', 'em', 'strong', 'cite', 'code', 'br'))); // The caption must be non-empty. if (Unicode::strlen($caption) === 0) { continue; } // Given the updated node and caption: re-render it with a caption, but // bubble up the value of the class attribute of the captioned element, // this allows it to collaborate with e.g. the filter_align filter. $tag = $node->tagName; $classes = $node->getAttribute('class'); $node->removeAttribute('class'); $node = $node->parentNode->tagName === 'a' ? $node->parentNode : $node; $filter_caption = array('#theme' => 'filter_caption', '#node' => FilteredMarkup::create($node->C14N()), '#tag' => $tag, '#caption' => $caption, '#classes' => $classes); $altered_html = drupal_render($filter_caption); // Load the altered HTML into a new DOMDocument and retrieve the element. $updated_nodes = Html::load($altered_html)->getElementsByTagName('body')->item(0)->childNodes; foreach ($updated_nodes as $updated_node) { // Import the updated node from the new DOMDocument into the original // one, importing also the child nodes of the updated node. $updated_node = $dom->importNode($updated_node, TRUE); $node->parentNode->insertBefore($updated_node, $node); } // Finally, remove the original data-caption node. $node->parentNode->removeChild($node); } $result->setProcessedText(Html::serialize($dom))->addAttachments(array('library' => array('filter/caption'))); } return $result; }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (stristr($text, 'data-align') !== FALSE) { $dom = Html::load($text); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//*[@data-align]') as $node) { // Read the data-align attribute's value, then delete it. $align = $node->getAttribute('data-align'); $node->removeAttribute('data-align'); // If one of the allowed alignments, add the corresponding class. if (in_array($align, array('left', 'center', 'right'))) { $classes = $node->getAttribute('class'); $classes = strlen($classes) > 0 ? explode(' ', $classes) : array(); $classes[] = 'align-' . $align; $node->setAttribute('class', implode(' ', $classes)); } } $result->setProcessedText(Html::serialize($dom)); } return $result; }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (stristr($text, '<img ') !== FALSE) { $dom = Html::load($text); $images = $dom->getElementsByTagName('img'); foreach ($images as $image) { $src = $image->getAttribute("src"); // The src must be non-empty. if (Unicode::strlen($src) === 0) { continue; } // The src must not already be an external URL if (stristr($src, 'http://') !== FALSE || stristr($src, 'https://') !== FALSE) { continue; } $url = Url::fromUri('internal:' . $src, array('absolute' => TRUE)); $url_string = $url->toString(); $image->setAttribute('src', $url_string); } $result->setProcessedText(Html::serialize($dom)); } return $result; }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); $dom = Html::load($text); $xpath = new \DOMXPath($dom); /** @var \DOMNode $node */ foreach ($xpath->query('//img') as $node) { // Read the data-align attribute's value, then delete it. $width = $node->getAttribute('width'); $height = $node->getAttribute('height'); $src = $node->getAttribute('src'); if (!UrlHelper::isExternal($src)) { if ($width || $height) { /** @var \DOMNode $element */ $element = $dom->createElement('a'); $element->setAttribute('href', $src); $node->parentNode->replaceChild($element, $node); $element->appendChild($node); } } } $result->setProcessedText(Html::serialize($dom)); return $result; }
/** * Provides filtering of tag attributes into accepted HTML. * * @param string $text * The HTML text string to be filtered. * * @return string * Filtered HTML with attributes filtered according to the settings. */ public function filterAttributes($text) { $restrictions = $this->getHTMLRestrictions(); $global_allowed_attributes = array_filter($restrictions['allowed']['*']); unset($restrictions['allowed']['*']); // Apply attribute restrictions to tags. $html_dom = Html::load($text); $xpath = new \DOMXPath($html_dom); foreach ($restrictions['allowed'] as $allowed_tag => $tag_attributes) { // By default, no attributes are allowed for a tag, but due to the // globally whitelisted attributes, it is impossible for a tag to actually // completely disallow attributes. if ($tag_attributes === FALSE) { $tag_attributes = []; } $allowed_attributes = ['exact' => [], 'prefix' => []]; foreach ($global_allowed_attributes + $tag_attributes as $name => $values) { // A trailing * indicates wildcard, but it must have some prefix. if (substr($name, -1) === '*' && $name[0] !== '*') { $allowed_attributes['prefix'][str_replace('*', '', $name)] = $this->prepareAttributeValues($values); } else { $allowed_attributes['exact'][$name] = $this->prepareAttributeValues($values); } } krsort($allowed_attributes['prefix']); // Find all matching elements that have any attributes and filter the // attributes by name and value. foreach ($xpath->query('//' . $allowed_tag . '[@*]') as $element) { $this->filterElementAttributes($element, $allowed_attributes); } } if ($this->settings['filter_html_nofollow']) { $links = $html_dom->getElementsByTagName('a'); foreach ($links as $link) { $link->setAttribute('rel', 'nofollow'); } } $text = Html::serialize($html_dom); return trim($text); }
/** * {@inheritdoc} */ public function processEmbeds($text) { $document = Html::load($text); $xpath = new \DOMXPath($document); foreach ($xpath->query('//oembed') as $node) { $embed = $this->getEmbedObject($node->nodeValue); if (!empty($embed) && !empty($embed->html)) { $this->swapEmbedHtml($node, $embed); } } return Html::serialize($document); }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (strpos($text, 'data-entity-type') !== FALSE && (strpos($text, 'data-entity-embed-display') !== FALSE || strpos($text, 'data-view-mode') !== FALSE)) { $dom = Html::load($text); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//drupal-entity[@data-entity-type and (@data-entity-uuid or @data-entity-id) and (@data-entity-embed-display or @data-view-mode)]') as $node) { /** @var \DOMElement $node */ $entity_type = $node->getAttribute('data-entity-type'); $entity = NULL; $entity_output = ''; try { // Load the entity either by UUID (preferred) or ID. $id = $node->getAttribute('data-entity-uuid') ?: $node->getAttribute('data-entity-id'); $entity = $this->loadEntity($entity_type, $id); if ($entity) { // Protect ourselves from recursive rendering. static $depth = 0; $depth++; if ($depth > 20) { throw new RecursiveRenderingException(sprintf('Recursive rendering detected when rendering embedded %s entity %s.', $entity_type, $entity->id())); } // If a UUID was not used, but is available, add it to the HTML. if (!$node->getAttribute('data-entity-uuid') && ($uuid = $entity->uuid())) { $node->setAttribute('data-entity-uuid', $uuid); } $access = $entity->access('view', NULL, TRUE); $access_metadata = CacheableMetadata::createFromObject($access); $entity_metadata = CacheableMetadata::createFromObject($entity); $result = $result->merge($entity_metadata)->merge($access_metadata); $context = $this->getNodeAttributesAsArray($node); $context += array('data-langcode' => $langcode); $entity_output = $this->renderEntityEmbed($entity, $context); $depth--; } else { throw new EntityNotFoundException(sprintf('Unable to load embedded %s entity %s.', $entity_type, $id)); } } catch (\Exception $e) { watchdog_exception('entity_embed', $e); } $this->replaceNodeContent($node, $entity_output); } $result->setProcessedText(Html::serialize($dom)); } return $result; }
/** * Locate all images in a piece of text that need replacing. * * An array of settings that will be used to identify which images need * updating. Includes the following: * * - image_locations: An array of acceptable image locations. * of the following values: "remote". Remote image will be downloaded and * saved locally. This procedure is intensive as the images need to * be retrieved to have their dimensions checked. * * @param string $text * The text to be updated with the new img src tags. * * @return array $images * An list of images. */ private function getImages($text) { $dom = Html::load($text); $xpath = new \DOMXPath($dom); /** @var \DOMNode $node */ foreach ($xpath->query('//img') as $node) { $file = $this->entityRepository->loadEntityByUuid('file', $node->getAttribute('data-entity-uuid')); // If the image hasn't an uuid then don't try to resize it. if (is_null($file)) { continue; } $image = $this->imageFactory->get($node->getAttribute('src')); // Checking if the image needs to be resized. if ($image->getWidth() == $node->getAttribute('width') && $image->getHeight() == $node->getAttribute('height')) { continue; } $target = file_uri_target($file->getFileUri()); $dirname = dirname($target) != '.' ? dirname($target) . '/' : ''; $info = pathinfo($file->getFileUri()); $resize_file_path = 'public://resize/' . $dirname . $info['filename'] . '-' . $node->getAttribute('width') . 'x' . $node->getAttribute('height') . '.' . $info['extension']; // Checking if the image was already resized: if (file_exists($resize_file_path)) { $node->setAttribute('src', file_url_transform_relative(file_create_url($resize_file_path))); continue; } // Delete this when https://www.drupal.org/node/2211657#comment-11510213 // be fixed. $dirname = $this->fileSystem->dirname($resize_file_path); if (!file_exists($dirname)) { file_prepare_directory($dirname, FILE_CREATE_DIRECTORY); } // Checks if the resize filter exists if is not then create it. $copy = file_unmanaged_copy($file->getFileUri(), $resize_file_path, FILE_EXISTS_REPLACE); $copy_image = $this->imageFactory->get($copy); $copy_image->resize($node->getAttribute('width'), $node->getAttribute('height')); $copy_image->save(); $node->setAttribute('src', file_url_transform_relative(file_create_url($copy))); } return Html::serialize($dom); }
/** * Applies a very permissive XSS/HTML filter to data-attributes. * * @param string $html * The string to apply the data-attributes filtering to. * * @return string * The filtered string. */ protected static function filterXssDataAttributes($html) { if (stristr($html, 'data-') !== FALSE) { $dom = Html::load($html); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//@*[starts-with(name(.), "data-")]') as $node) { // The data-attributes contain an HTML-encoded value, so we need to // decode the value, apply XSS filtering and then re-save as encoded // value. There is no need to explicitly decode $node->value, since the // DOMAttr::value getter returns the decoded value. $value = Xss::filterAdmin($node->value); $node->value = Html::escape($value); } $html = Html::serialize($dom); } return $html; }
/** * {@inheritdoc} */ public function process($text, $langcode) { $result = new FilterProcessResult($text); if (stristr($text, 'data-caption') !== FALSE || stristr($text, 'data-align') !== FALSE) { $caption_found = FALSE; $dom = Html::load($text); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//*[@data-caption or @data-align]') as $node) { $caption = NULL; $align = NULL; // Retrieve, then remove the data-caption and data-align attributes. if ($node->hasAttribute('data-caption')) { $caption = String::checkPlain($node->getAttribute('data-caption')); $node->removeAttribute('data-caption'); // Sanitize caption: decode HTML encoding, limit allowed HTML tags; // only allow inline tags that are allowed by default, plus <br>. $caption = String::decodeEntities($caption); $caption = Xss::filter($caption, array('a', 'em', 'strong', 'cite', 'code', 'br')); // The caption must be non-empty. if (Unicode::strlen($caption) === 0) { $caption = NULL; } } if ($node->hasAttribute('data-align')) { $align = $node->getAttribute('data-align'); $node->removeAttribute('data-align'); // Only allow 3 values: 'left', 'center' and 'right'. if (!in_array($align, array('left', 'center', 'right'))) { $align = NULL; } } // Don't transform the HTML if there isn't a caption after validation. if ($caption === NULL) { // If there is a valid alignment, then transform the data-align // attribute to a corresponding alignment class. if ($align !== NULL) { $classes = $node->getAttribute('class'); $classes = strlen($classes) > 0 ? explode(' ', $classes) : array(); $classes[] = 'align-' . $align; $node->setAttribute('class', implode(' ', $classes)); } continue; } else { $caption_found = TRUE; } // Given the updated node, caption and alignment: re-render it with a // caption. $filter_caption = array('#theme' => 'filter_caption', '#node' => SafeMarkup::set($node->C14N()), '#tag' => $node->tagName, '#caption' => $caption, '#align' => $align); $altered_html = drupal_render($filter_caption); // Load the altered HTML into a new DOMDocument and retrieve the element. $updated_node = Html::load($altered_html)->getElementsByTagName('body')->item(0)->childNodes->item(0); // Import the updated node from the new DOMDocument into the original // one, importing also the child nodes of the updated node. $updated_node = $dom->importNode($updated_node, TRUE); // Finally, replace the original image node with the new image node! $node->parentNode->replaceChild($updated_node, $node); } $result->setProcessedText(Html::serialize($dom)); if ($caption_found) { $result->addAssets(array('library' => array('filter/caption'))); } } return $result; }
/** * Truncates HTML text by words. * * @param string $html * Text to be updated. * @param int $limit * Amount of text to allow. * @param string $ellipsis * Characters to use at the end of the text. * * @return mixed * Resulting text. */ public function truncateWords($html, $limit, $ellipsis = '...') { if ($limit <= 0 || $limit >= $this->countWords(strip_tags($html))) { return $html; } $dom = $this->init($html, $limit, $ellipsis); // Pass the body node on to be processed. $this->domNodeTruncateWords($this->startNode); return Html::serialize($dom); }
/** * Test DomHelperTrait::replaceNodeContent(). * * @dataProvider providerTestReplaceNodeContent */ public function testReplaceNodeContent($content, $expected_output) { $this->replaceNodeContent($this->node, $content); $this->assertEquals($expected_output, Html::serialize($this->document)); }