/** * @param string $sHtml * @param bool $bHasExternals = false * @param array $aFoundCIDs = array() * @param array $aContentLocationUrls = array() * @param array $aFoundedContentLocationUrls = array() * @param bool $bDoNotReplaceExternalUrl = false * @param bool $bFindLinksInHtml = false * @param callback|null $fAdditionalExternalFilter = null * * @return string */ public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array(), $bDoNotReplaceExternalUrl = false, $bFindLinksInHtml = false, $fAdditionalExternalFilter = null) { $sResult = ''; $sHtml = null === $sHtml ? '' : (string) $sHtml; $sHtml = \trim($sHtml); if (0 === \strlen($sHtml)) { return ''; } if ($fAdditionalExternalFilter && !\is_callable($fAdditionalExternalFilter)) { $fAdditionalExternalFilter = null; } $bHasExternals = false; $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml); $sHtmlAttrs = $sBodyAttrs = ''; $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml, $sHtmlAttrs, $sBodyAttrs); // Dom Part $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml, $sHtmlAttrs, $sBodyAttrs); unset($sHtml); if ($oDom) { if ($bFindLinksInHtml) { \MailSo\Base\HtmlUtils::FindLinksInDOM($oDom); } $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as $oElement) { if (\in_array(\strtolower($oElement->tagName), array('svg', 'head', 'link', 'base', 'meta', 'title', 'style', 'script', 'bgsound', 'keygen', 'source', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset', 'video', 'audio')) && isset($oElement->parentNode)) { @$oElement->parentNode->removeChild($oElement); } } $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as $oElement) { $sTagNameLower = \strtolower($oElement->tagName); // convert body attributes to styles if ('body' === $sTagNameLower) { $aAttrs = array('text' => '', 'topmargin' => '', 'leftmargin' => '', 'bottommargin' => '', 'rightmargin' => ''); if (isset($oElement->attributes)) { foreach ($oElement->attributes as $sAttributeName => $oAttributeNode) { if ($oAttributeNode && isset($oAttributeNode->nodeValue)) { $sAttributeNameLower = \strtolower($sAttributeName); if (isset($aAttrs[$sAttributeNameLower]) && '' === $aAttrs[$sAttributeNameLower]) { $aAttrs[$sAttributeNameLower] = array($sAttributeName, \trim($oAttributeNode->nodeValue)); } } } } $aStyles = array(); foreach ($aAttrs as $sIndex => $aItem) { if (\is_array($aItem)) { $oElement->removeAttribute($aItem[0]); switch ($sIndex) { case 'text': $aStyles[] = 'color: ' . $aItem[1]; break; case 'topmargin': $aStyles[] = 'margin-top: ' . (int) $aItem[1] . 'px'; break; case 'leftmargin': $aStyles[] = 'margin-left: ' . (int) $aItem[1] . 'px'; break; case 'bottommargin': $aStyles[] = 'margin-bottom: ' . (int) $aItem[1] . 'px'; break; case 'rightmargin': $aStyles[] = 'margin-right: ' . (int) $aItem[1] . 'px'; break; } } } if (0 < \count($aStyles)) { $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles)); } } if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) { $oElement->setAttribute('src', 'javascript:false'); } if (\in_array($sTagNameLower, array('a', 'form', 'area'))) { $oElement->setAttribute('target', '_blank'); } if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) { $oElement->setAttribute('tabindex', '-1'); } // if ('blockquote' === $sTagNameLower) // { // $oElement->removeAttribute('style'); // } foreach (array('id', 'class', 'contenteditable', 'designmode', 'formaction', 'data-bind', 'xmlns', 'srcset') as $sAttr) { @$oElement->removeAttribute($sAttr); } foreach (array('load', 'blur', 'error', 'focus', 'formchange', 'change', 'click', 'dblclick', 'keydown', 'keypress', 'keyup', 'mousedown', 'mouseenter', 'mouseleave', 'mousemove', 'mouseout', 'mouseover', 'mouseup', 'move', 'resize', 'resizeend', 'resizestart', 'scroll', 'select', 'submit', 'upload') as $sAttr) { @$oElement->removeAttribute('on' . $sAttr); } if ($oElement->hasAttribute('href')) { $sHref = \trim($oElement->getAttribute('href')); if (!\preg_match('/^(http[s]?|ftp|skype|mailto):/i', $sHref)) { $oElement->setAttribute('data-x-broken-href', $sHref); $oElement->setAttribute('href', 'javascript:false'); } else { if ('a' === $sTagNameLower) { $oElement->setAttribute('rel', 'external'); } } } if ($oElement->hasAttribute('src')) { $sSrc = \trim($oElement->getAttribute('src')); $oElement->removeAttribute('src'); if (\in_array($sSrc, $aContentLocationUrls)) { $oElement->setAttribute('data-x-src-location', $sSrc); $aFoundedContentLocationUrls[] = $sSrc; } else { if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) { $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4)); $aFoundCIDs[] = \substr($sSrc, 4); } else { if (\preg_match('/http[s]?:\\/\\//i', $sSrc)) { if ($bDoNotReplaceExternalUrl) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-src', $sSrc); if ($fAdditionalExternalFilter) { $sCallResult = \call_user_func($fAdditionalExternalFilter, $sSrc); if (0 < \strlen($sCallResult)) { $oElement->setAttribute('data-x-additional-src', $sCallResult); } } } $bHasExternals = true; } else { if ('data:image/' === \strtolower(\substr(\trim($sSrc), 0, 11))) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-broken-src', $sSrc); } } } } } $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : ''; $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : ''; if (!empty($sBackground) || !empty($sBackgroundColor)) { $aStyles = array(); $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; if (!empty($sBackground)) { $aStyles[] = 'background-image: url(\'' . $sBackground . '\')'; $oElement->removeAttribute('background'); } if (!empty($sBackgroundColor)) { $aStyles[] = 'background-color: ' . $sBackgroundColor; $oElement->removeAttribute('bgcolor'); } $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles)); } if ($oElement->hasAttribute('style')) { $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl, $fAdditionalExternalFilter)); } } $sResult = $oDom->saveHTML(); } unset($oDom); $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult); $sHtmlAttrs = $sBodyAttrs = ''; $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, $sHtmlAttrs, $sBodyAttrs); $sResult = '<div data-x-div-type="body" ' . $sBodyAttrs . '>' . $sResult . '</div>'; $sResult = '<div data-x-div-type="html" ' . $sHtmlAttrs . '>' . $sResult . '</div>'; $sResult = \str_replace(\MailSo\Base\HtmlUtils::$KOS, ':', $sResult); return \trim($sResult); }
/** * @param string $sHtml * @param bool $bHasExternals = false * @param array $aFoundCIDs = array() * @param array $aContentLocationUrls = array() * @param array $aFoundedContentLocationUrls = array() * @param bool $bDoNotReplaceExternalUrl = false * @param bool $bFindLinksInHtml = false * @param callback|null $fAdditionalExternalFilter = null * @param callback|null $fAdditionalDomReader = null * * @return string */ public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array(), $bDoNotReplaceExternalUrl = false, $bFindLinksInHtml = false, $fAdditionalExternalFilter = null, $fAdditionalDomReader = false, $bTryToDetectHiddenImages = false) { $sResult = ''; $sHtml = null === $sHtml ? '' : (string) $sHtml; $sHtml = \trim($sHtml); if (0 === \strlen($sHtml)) { return ''; } if ($fAdditionalExternalFilter && !\is_callable($fAdditionalExternalFilter)) { $fAdditionalExternalFilter = null; } if ($fAdditionalDomReader && !\is_callable($fAdditionalDomReader)) { $fAdditionalDomReader = null; } $bHasExternals = false; $sHtml = \MailSo\Base\HtmlUtils::FixSchemas($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml, false); $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml); $sHtmlAttrs = $sBodyAttrs = ''; $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml, $sHtmlAttrs, $sBodyAttrs); // Dom Part $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml, $sHtmlAttrs, $sBodyAttrs); unset($sHtml); if ($oDom) { if ($fAdditionalDomReader) { $oResDom = \call_user_func($fAdditionalDomReader, $oDom); if ($oResDom) { $oDom = $oResDom; } unset($oResDom); } if ($bFindLinksInHtml) { \MailSo\Base\HtmlUtils::FindLinksInDOM($oDom); } $oXpath = new \DOMXpath($oDom); $oComments = $oXpath->query('//comment()'); if ($oComments) { foreach ($oComments as $oComment) { if (isset($oComment->parentNode)) { @$oComment->parentNode->removeChild($oComment); } } } unset($oXpath, $oComments); $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as $oElement) { if ($oElement) { $sTagNameLower = \strtolower($oElement->tagName); if ('' !== $sTagNameLower && \in_array($sTagNameLower, array('svg', 'head', 'link', 'base', 'meta', 'title', 'style', 'x-script', 'script', 'bgsound', 'keygen', 'source', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset', 'video', 'audio', 'area', 'map'))) { if (isset($oElement->parentNode)) { @$oElement->parentNode->removeChild($oElement); } } } } $sLinkColor = ''; $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as $oElement) { $sTagNameLower = \strtolower($oElement->tagName); // convert body attributes to styles if ('body' === $sTagNameLower) { $aAttrs = array('link' => '', 'text' => '', 'topmargin' => '', 'leftmargin' => '', 'bottommargin' => '', 'rightmargin' => ''); if (isset($oElement->attributes)) { foreach ($oElement->attributes as $sAttributeName => $oAttributeNode) { if ($oAttributeNode && isset($oAttributeNode->nodeValue)) { $sAttributeNameLower = \strtolower($sAttributeName); if (isset($aAttrs[$sAttributeNameLower]) && '' === $aAttrs[$sAttributeNameLower]) { $aAttrs[$sAttributeNameLower] = array($sAttributeName, \trim($oAttributeNode->nodeValue)); } } } } $aStyles = array(); foreach ($aAttrs as $sIndex => $aItem) { if (\is_array($aItem)) { $oElement->removeAttribute($aItem[0]); switch ($sIndex) { case 'link': $sLinkColor = \trim($aItem[1]); if (!\preg_match('/^#[abcdef0-9]{3,6}$/i', $sLinkColor)) { $sLinkColor = ''; } break; case 'text': $aStyles[] = 'color: ' . $aItem[1]; break; case 'topmargin': $aStyles[] = 'margin-top: ' . (int) $aItem[1] . 'px'; break; case 'leftmargin': $aStyles[] = 'margin-left: ' . (int) $aItem[1] . 'px'; break; case 'bottommargin': $aStyles[] = 'margin-bottom: ' . (int) $aItem[1] . 'px'; break; case 'rightmargin': $aStyles[] = 'margin-right: ' . (int) $aItem[1] . 'px'; break; } } } if (0 < \count($aStyles)) { $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : ''; $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles)); } } if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) { $oElement->setAttribute('src', 'javascript:false'); } if ('a' === $sTagNameLower && !empty($sLinkColor)) { $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : ''; $oElement->setAttribute('style', 'color: ' . $sLinkColor . \trim(empty($sStyles) ? '' : '; ' . $sStyles)); } if (\in_array($sTagNameLower, array('a', 'form', 'area'))) { $oElement->setAttribute('target', '_blank'); } if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) { $oElement->setAttribute('tabindex', '-1'); } foreach (array('id', 'class', 'contenteditable', 'designmode', 'formaction', 'data-bind', 'data-reactid', 'xmlns', 'srcset', 'data-x-skip-style') as $sAttr) { @$oElement->removeAttribute($sAttr); } foreach (array('load', 'blur', 'error', 'focus', 'formchange', 'change', 'click', 'dblclick', 'keydown', 'keypress', 'keyup', 'mousedown', 'mouseenter', 'mouseleave', 'mousemove', 'mouseout', 'mouseover', 'mouseup', 'move', 'resize', 'resizeend', 'resizestart', 'scroll', 'select', 'submit', 'upload') as $sAttr) { @$oElement->removeAttribute('on' . $sAttr); } if ($oElement->hasAttribute('href')) { $sHref = \trim($oElement->getAttribute('href')); if (!\preg_match('/^(http[s]?|ftp|skype|mailto):/i', $sHref) && '//' !== \substr($sHref, 0, 2)) { $oElement->setAttribute('data-x-broken-href', $sHref); $oElement->setAttribute('href', 'javascript:false'); } if ('a' === $sTagNameLower) { $oElement->setAttribute('rel', 'external nofollow'); } } if ($bTryToDetectHiddenImages && 'img' === $sTagNameLower) { $sAlt = $oElement->hasAttribute('alt') ? \trim($oElement->getAttribute('alt')) : ''; if ($oElement->hasAttribute('src') && '' === $sAlt) { $aH = array('email.microsoftemail.com/open', 'github.com/notifications/beacon/', 'mandrillapp.com/track/open', 'list-manage.com/track/open'); $sH = $oElement->hasAttribute('height') ? \trim($oElement->getAttribute('height')) : ''; // $sW = $oElement->hasAttribute('width') // ? \trim($oElement->getAttribute('width')) : ''; $sStyles = $oElement->hasAttribute('style') ? \preg_replace('/[\\s]+/', '', \trim(\trim(\trim($oElement->getAttribute('style')), ';'))) : ''; $sSrc = \trim($oElement->getAttribute('src')); $bC = \in_array($sH, array('1', '0', '1px', '0px')) || \preg_match('/(display:none|visibility:hidden|height:0|height:[01][a-z][a-z])/i', $sStyles); if (!$bC) { $sSrcLower = \strtolower($sSrc); foreach ($aH as $sLine) { if (false !== \strpos($sSrcLower, $sLine)) { $bC = true; break; } } } if ($bC) { $oElement->setAttribute('style', 'display:none'); $oElement->setAttribute('data-x-skip-style', 'true'); $oElement->setAttribute('data-x-hidden-src', $sSrc); $oElement->removeAttribute('src'); } } } if ($oElement->hasAttribute('src')) { $sSrc = \trim($oElement->getAttribute('src')); $oElement->removeAttribute('src'); if (\in_array($sSrc, $aContentLocationUrls)) { $oElement->setAttribute('data-x-src-location', $sSrc); $aFoundedContentLocationUrls[] = $sSrc; } else { if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) { $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4)); $aFoundCIDs[] = \substr($sSrc, 4); } else { if (\preg_match('/^http[s]?:\\/\\//i', $sSrc) || '//' === \substr($sSrc, 0, 2)) { if ($bDoNotReplaceExternalUrl) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-src', $sSrc); if ($fAdditionalExternalFilter) { $sCallResult = \call_user_func($fAdditionalExternalFilter, $sSrc); if (0 < \strlen($sCallResult)) { $oElement->setAttribute('data-x-additional-src', $sCallResult); } } } $bHasExternals = true; } else { if ('data:image/' === \strtolower(\substr($sSrc, 0, 11))) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-broken-src', $sSrc); } } } } } $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : ''; $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : ''; if (!empty($sBackground) || !empty($sBackgroundColor)) { $aStyles = array(); $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : ''; if (!empty($sBackground)) { $aStyles[] = 'background-image: url(\'' . $sBackground . '\')'; $oElement->removeAttribute('background'); } if (!empty($sBackgroundColor)) { $aStyles[] = 'background-color: ' . $sBackgroundColor; $oElement->removeAttribute('bgcolor'); } $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles)); } if ($oElement->hasAttribute('style') && !$oElement->hasAttribute('data-x-skip-style')) { $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl, $fAdditionalExternalFilter)); } $oElement->removeAttribute('data-x-skip-style'); } $sResult = $oDom->saveHTML(); } unset($oDom); $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult); $sHtmlAttrs = $sBodyAttrs = ''; $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, $sHtmlAttrs, $sBodyAttrs); $sResult = '<div data-x-div-type="body" ' . $sBodyAttrs . '>' . $sResult . '</div>'; $sResult = '<div data-x-div-type="html" ' . $sHtmlAttrs . '>' . $sResult . '</div>'; $sResult = \str_replace(\MailSo\Base\HtmlUtils::$KOS, ':', $sResult); $sResult = \MailSo\Base\Utils::StripSpaces($sResult); return \trim($sResult); }
/** * @param string $sHtml * @param bool $bHasExternals = false * @param array $aFoundCIDs = array() * @param array $aContentLocationUrls = array() * @param array $aFoundedContentLocationUrls = array() * * @return string */ public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array()) { $sHtml = null === $sHtml ? '' : (string) $sHtml; $sHtml = \trim($sHtml); if (0 === \strlen($sHtml)) { return ''; } $bHasExternals = false; $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml); // Dom Part $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml); unset($sHtml); if ($oDom) { $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as $oElement) { $sTagNameLower = \strtolower($oElement->tagName); if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) { $oElement->setAttribute('src', 'javascript:false'); } if (\in_array($sTagNameLower, array('a', 'form', 'area'))) { $oElement->setAttribute('target', '_blank'); } if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) { $oElement->setAttribute('tabindex', '-1'); } // if ('blockquote' === $sTagNameLower) // { // $oElement->removeAttribute('style'); // } @$oElement->removeAttribute('id'); @$oElement->removeAttribute('class'); @$oElement->removeAttribute('contenteditable'); @$oElement->removeAttribute('designmode'); @$oElement->removeAttribute('data-bind'); @$oElement->removeAttribute('xmlns'); if ($oElement->hasAttribute('src')) { $sSrc = \trim($oElement->getAttribute('src')); $oElement->removeAttribute('src'); if (\in_array($sSrc, $aContentLocationUrls)) { $oElement->setAttribute('data-x-src-location', $sSrc); $aFoundedContentLocationUrls[] = $sSrc; } else { if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) { $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4)); $aFoundCIDs[] = \substr($sSrc, 4); } else { if (\preg_match('/http[s]?:\\/\\//i', $sSrc)) { $oElement->setAttribute('data-x-src', $sSrc); $bHasExternals = true; } else { if ('data:image/' === \strtolower(\substr(\trim($sSrc), 0, 11))) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-broken-src', $sSrc); } } } } } $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : ''; $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : ''; if (!empty($sBackground) || !empty($sBackgroundColor)) { $aStyles = array(); $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; if (!empty($sBackground)) { $aStyles[] = 'background-image: url(\'' . $sBackground . '\')'; $oElement->removeAttribute('background'); } if (!empty($sBackgroundColor)) { $aStyles[] = 'background-color: ' . $sBackgroundColor; $oElement->removeAttribute('bgcolor'); } $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles)); } if ($oElement->hasAttribute('style')) { $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls)); } } $sResult = $oDom->saveHTML(); } unset($oDom); $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult); $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, true); return \trim($sResult); }