Esempio n. 1
0
 /**
  * {@inheritdoc}
  */
 public function sanitise($value, $isWysiwyg = false)
 {
     $allowedTags = $isWysiwyg ? $this->getWyswigAllowedTags() : $this->getAllowedTags();
     // Check if the input containts encoded HTML entities. If it does, we'll
     // need to decode the output later. This is because the sanitiser will
     // convert entities in the cleaned HTML, if they aren't present yet.
     // Ideally we'd fix this upstream by using \DomDocument::substituteEntities,
     // but that setting is disregarded in PHP's implementation at least.
     // This leaves us no choice but to implement this crude, albeit contained
     // fix in this location.
     $needsDecodeEntities = $value === html_entity_decode($value, ENT_NOQUOTES);
     $maid = new Maid(['output-format' => 'html', 'allowed-tags' => $allowedTags, 'allowed-attribs' => $this->getAllowedAttributes()]);
     $output = $maid->clean($value);
     if ($needsDecodeEntities) {
         $output = html_entity_decode($output, ENT_NOQUOTES);
     }
     return $output;
 }
 /**
  * Creates RSS safe content. Wraps it in CDATA tags, strips style and
  * scripts out. Can optionally also return a (cleaned) excerpt.
  *
  * @param Content $record        Bolt Content object
  * @param string  $fields        Comma separated list of fields to clean up
  * @param integer $excerptLength Number of chars of the excerpt
  *
  * @return string RSS safe string
  */
 public function ampSafe($record, $fields = '', $excerptLength = 0)
 {
     // Make sure we have an array of fields. Even if it's only one.
     if (!is_array($fields)) {
         $fields = explode(',', $fields);
     }
     $fields = array_map('trim', $fields);
     $result = '';
     foreach ($fields as $field) {
         if (!array_key_exists($field, $record->values)) {
             continue;
         }
         // Completely remove style and script blocks
         $maid = new Maid(['output-format' => 'html', 'allowed-tags' => ['a', 'b', 'br', 'hr', 'h1', 'h2', 'h3', 'h4', 'p', 'strong', 'em', 'i', 'u', 'strike', 'ul', 'ol', 'li', 'img'], 'allowed-attribs' => ['id', 'class', 'name', 'value', 'href', 'src']]);
         $result .= $maid->clean($record->values[$field]);
     }
     if ($excerptLength > 0) {
         $result = Html::trimText($result, $excerptLength);
     }
     return new \Twig_Markup('<![CDATA[ ' . $result . ' ]]>', 'utf-8');
 }
Esempio n. 3
0
File: lib.php Progetto: LeonB/site
/**
 * Truncate a given HTML fragment to the desired length (measured as character
 * count), additionally performing some cleanup.
 *
 * @param string $html The HTML fragment to clean up
 * @param int $desiredLength The desired number of characters, or NULL to do
 *                           just the cleanup (but no truncating).
 * @param string $ellipseStr If non-empty, this string will be appended to the
 *                           last collected node when the document gets
 *                           truncated.
 * @param bool $stripTags If TRUE, remove *all* HTML tags. Otherwise, keep a
 *                        whitelisted 'safe' set.
 * @param bool $nbsp If TRUE, convert all whitespace runs to non-breaking
 *                   spaces ('&nbsp;' entities).
 */
function trimToHTML($html, $desiredLength = null, $ellipseStr = "…", $stripTags = false, $nbsp = false)
{
    // We'll use htmlmaid to clean up the HTML, but because we also have to
    // step through the DOM ourselves to perform the trimming, so we'll do
    // the DOM loading ourselves, rather than leave it to Maid.
    // Do not load external entities - this would be a security risk.
    $prevEntityLoaderDisabled = libxml_disable_entity_loader(true);
    // Don't crash on invalid HTML, but recover gracefully
    $prevInternalErrors = libxml_use_internal_errors(true);
    $doc = new \DOMDocument();
    // We need a bit of wrapping here to keep DOMDocument from adding rogue nodes
    // around our HTML. By doing it explicitly, we keep things under control.
    $doc->loadHTML('<!DOCTYPE html><html>' . '<head><meta http-equiv="Content-type" content="text/html;charset=utf-8"/></head>' . '<body><div>' . $html . '</div></body>' . '</html>');
    $options = array();
    if ($stripTags) {
        $options['allowed-tags'] = array();
    } else {
        $options['allowed-tags'] = array('a', 'div', 'p', 'b', 'i', 'hr', 'br', 'strong', 'em');
    }
    $options['allowed-attribs'] = array('href', 'src', 'id', 'class', 'style');
    $maid = new Maid($options);
    $cleanedNodes = $maid->clean($doc->documentElement->firstChild->nextSibling->firstChild);
    // To collect the cleaned nodes from a node list into a containing node,
    // we have to create yet another document, because cloning nodes inside
    // the same ownerDocument for some reason modifies our node list.
    // I have no idea why, but it does.
    $cleanedDoc = new \DOMDocument();
    $cleanedNode = $cleanedDoc->createElement('div');
    $length = $cleanedNodes->length;
    for ($i = 0; $i < $length; ++$i) {
        $node = $cleanedNodes->item($i);
        $cnode = $cleanedDoc->importNode($node, true);
        $cleanedNode->appendChild($cnode);
    }
    // And now we'll create yet another document (who's keeping count?) to
    // collect our trimmed nodes.
    $newDoc = new \DOMDocument();
    // Again, some wrapping is necessary here...
    $newDoc->loadHTML('<html><body><div></div></body></html>');
    $newNode = $newDoc->documentElement->firstChild->firstChild;
    $length = $desiredLength;
    _collectNodesUpToLength($cleanedNode, $newNode, $length, $ellipseStr);
    // Convert spaces inside text nodes to &nbsp;
    // This will actually insert the unicode non-breaking space, so we'll have
    // to massage our output at the HTML byte-string level later.
    if ($nbsp) {
        domSpacesToNBSP($newNode->firstChild->firstChild);
    }
    // This is some terrible shotgun hacking; for some reason, the above code
    // will sometimes put our desired nodes two levels deep, but in other
    // cases, it'll descend one less level. The proper solution would be
    // to sort out why this is, but for now, just detecting which of the
    // two happened seems to work well enough.
    if (isset($newNode->firstChild->firstChild->childNodes)) {
        $nodes = $newNode->firstChild->firstChild->childNodes;
    } elseif (isset($newNode->firstChild->childNodes)) {
        $nodes = $newNode->firstChild->childNodes;
    } else {
        $nodes = array();
    }
    // And now we convert our target nodes to HTML.
    // Because we don't want any of the wrapper nodes to appear in the
    // output, we'll have to convert them one by one and concatenate the
    // HTML.
    $result = '';
    foreach ($nodes as $node) {
        $result .= Maid::renderFragment($node);
    }
    if ($nbsp) {
        $result = str_replace(html_entity_decode('&nbsp;'), '&nbsp;', $result);
    }
    // Restore previous libxml settings
    libxml_disable_entity_loader($prevEntityLoaderDisabled);
    libxml_use_internal_errors($prevInternalErrors);
    return $result;
}
Esempio n. 4
0
 /**
  * Get the decoded version of a value of the current object.
  *
  * @param string $name name of the value to get
  *
  * @return mixed The decoded value or null when no value available
  */
 public function getDecodedValue($name)
 {
     $value = null;
     if (isset($this->values[$name])) {
         $fieldtype = $this->fieldtype($name);
         $fieldinfo = $this->fieldinfo($name);
         $allowtwig = !empty($fieldinfo['allowtwig']);
         switch ($fieldtype) {
             case 'markdown':
                 $value = $this->preParse($this->values[$name], $allowtwig);
                 // Parse the field as Markdown, return HTML
                 $value = $this->app['markdown']->text($value);
                 $config = $this->app['config']->get('general/htmlcleaner');
                 $allowed_tags = !empty($config['allowed_tags']) ? $config['allowed_tags'] : ['div', 'p', 'br', 'hr', 's', 'u', 'strong', 'em', 'i', 'b', 'li', 'ul', 'ol', 'blockquote', 'pre', 'code', 'tt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd', 'dl', 'dt', 'table', 'tbody', 'thead', 'tfoot', 'th', 'td', 'tr', 'a', 'img'];
                 $allowed_attributes = !empty($config['allowed_attributes']) ? $config['allowed_attributes'] : ['id', 'class', 'name', 'value', 'href', 'src'];
                 // Sanitize/clean the HTML.
                 $maid = new Maid(['output-format' => 'html', 'allowed-tags' => $allowed_tags, 'allowed-attribs' => $allowed_attributes]);
                 $value = $maid->clean($value);
                 $value = new \Twig_Markup($value, 'UTF-8');
                 break;
             case 'html':
             case 'text':
             case 'textarea':
                 $value = $this->preParse($this->values[$name], $allowtwig);
                 $value = new \Twig_Markup($value, 'UTF-8');
                 break;
             case 'imagelist':
             case 'filelist':
                 if (is_string($this->values[$name])) {
                     // Parse the field as JSON, return the array
                     $value = json_decode($this->values[$name]);
                 } else {
                     // Already an array, do nothing.
                     $value = $this->values[$name];
                 }
                 break;
             case 'image':
                 if (is_array($this->values[$name]) && isset($this->values[$name]['file'])) {
                     $value = $this->values[$name]['file'];
                 } else {
                     $value = $this->values[$name];
                 }
                 break;
             default:
                 $value = $this->values[$name];
                 break;
         }
     }
     return $value;
 }
Esempio n. 5
0
 /**
  * Formats the given string as Markdown in HTML.
  *
  * @param string $content
  *
  * @return string Markdown output
  */
 public function markdown($content)
 {
     // Parse the field as Markdown, return HTML
     $output = $this->app['markdown']->text($content);
     $config = $this->app['config']->get('general/htmlcleaner');
     $allowed_tags = !empty($config['allowed_tags']) ? $config['allowed_tags'] : ['div', 'p', 'br', 'hr', 's', 'u', 'strong', 'em', 'i', 'b', 'li', 'ul', 'ol', 'blockquote', 'pre', 'code', 'tt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd', 'dl', 'dh', 'table', 'tbody', 'thead', 'tfoot', 'th', 'td', 'tr', 'a', 'img'];
     $allowed_attributes = !empty($config['allowed_attributes']) ? $config['allowed_attributes'] : ['id', 'class', 'name', 'value', 'href', 'src'];
     // Sanitize/clean the HTML.
     $maid = new Maid(['output-format' => 'html', 'allowed-tags' => $allowed_tags, 'allowed-attribs' => $allowed_attributes]);
     $output = $maid->clean($output);
     return $output;
 }
Esempio n. 6
0
 /**
  * {@inheritdoc}
  */
 public function sanitise($value, $isWysiwyg = false)
 {
     $allowedTags = $isWysiwyg ? $this->getWyswigAllowedTags() : $this->getAllowedTags();
     $maid = new Maid(['output-format' => 'html', 'allowed-tags' => $allowedTags, 'allowed-attribs' => $this->getAllowedAttributes()]);
     return $maid->clean($value);
 }