コード例 #1
0
ファイル: HtmlUtils.php プロジェクト: nsdown/rainloop-webmail
 /**
  * @param string $sHtml
  * @param bool $bHasExternals = false
  * @param array $aFoundCIDs = array()
  * @param array $aContentLocationUrls = array()
  * @param array $aFoundedContentLocationUrls = array()
  * @param bool $bDoNotReplaceExternalUrl = false
  * @param bool $bFindLinksInHtml = false
  * @param callback|null $fAdditionalExternalFilter = null
  * @param callback|null $fAdditionalDomReader = null
  *
  * @return string
  */
 public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array(), $bDoNotReplaceExternalUrl = false, $bFindLinksInHtml = false, $fAdditionalExternalFilter = null, $fAdditionalDomReader = false, $bTryToDetectHiddenImages = false)
 {
     $sResult = '';
     $sHtml = null === $sHtml ? '' : (string) $sHtml;
     $sHtml = \trim($sHtml);
     if (0 === \strlen($sHtml)) {
         return '';
     }
     if ($fAdditionalExternalFilter && !\is_callable($fAdditionalExternalFilter)) {
         $fAdditionalExternalFilter = null;
     }
     if ($fAdditionalDomReader && !\is_callable($fAdditionalDomReader)) {
         $fAdditionalDomReader = null;
     }
     $bHasExternals = false;
     $sHtml = \MailSo\Base\HtmlUtils::FixSchemas($sHtml);
     $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml, false);
     $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml);
     $sHtmlAttrs = $sBodyAttrs = '';
     $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml, $sHtmlAttrs, $sBodyAttrs);
     // Dom Part
     $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml, $sHtmlAttrs, $sBodyAttrs);
     unset($sHtml);
     if ($oDom) {
         if ($fAdditionalDomReader) {
             $oResDom = \call_user_func($fAdditionalDomReader, $oDom);
             if ($oResDom) {
                 $oDom = $oResDom;
             }
             unset($oResDom);
         }
         if ($bFindLinksInHtml) {
             \MailSo\Base\HtmlUtils::FindLinksInDOM($oDom);
         }
         $oXpath = new \DOMXpath($oDom);
         $oComments = $oXpath->query('//comment()');
         if ($oComments) {
             foreach ($oComments as $oComment) {
                 if (isset($oComment->parentNode)) {
                     @$oComment->parentNode->removeChild($oComment);
                 }
             }
         }
         unset($oXpath, $oComments);
         $aNodes = $oDom->getElementsByTagName('*');
         foreach ($aNodes as $oElement) {
             if ($oElement) {
                 $sTagNameLower = \strtolower($oElement->tagName);
                 if ('' !== $sTagNameLower && \in_array($sTagNameLower, array('svg', 'head', 'link', 'base', 'meta', 'title', 'style', 'x-script', 'script', 'bgsound', 'keygen', 'source', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset', 'video', 'audio', 'area', 'map'))) {
                     if (isset($oElement->parentNode)) {
                         @$oElement->parentNode->removeChild($oElement);
                     }
                 }
             }
         }
         $sLinkColor = '';
         $aNodes = $oDom->getElementsByTagName('*');
         foreach ($aNodes as $oElement) {
             $sTagNameLower = \strtolower($oElement->tagName);
             // convert body attributes to styles
             if ('body' === $sTagNameLower) {
                 $aAttrs = array('link' => '', 'text' => '', 'topmargin' => '', 'leftmargin' => '', 'bottommargin' => '', 'rightmargin' => '');
                 if (isset($oElement->attributes)) {
                     foreach ($oElement->attributes as $sAttributeName => $oAttributeNode) {
                         if ($oAttributeNode && isset($oAttributeNode->nodeValue)) {
                             $sAttributeNameLower = \strtolower($sAttributeName);
                             if (isset($aAttrs[$sAttributeNameLower]) && '' === $aAttrs[$sAttributeNameLower]) {
                                 $aAttrs[$sAttributeNameLower] = array($sAttributeName, \trim($oAttributeNode->nodeValue));
                             }
                         }
                     }
                 }
                 $aStyles = array();
                 foreach ($aAttrs as $sIndex => $aItem) {
                     if (\is_array($aItem)) {
                         $oElement->removeAttribute($aItem[0]);
                         switch ($sIndex) {
                             case 'link':
                                 $sLinkColor = \trim($aItem[1]);
                                 if (!\preg_match('/^#[abcdef0-9]{3,6}$/i', $sLinkColor)) {
                                     $sLinkColor = '';
                                 }
                                 break;
                             case 'text':
                                 $aStyles[] = 'color: ' . $aItem[1];
                                 break;
                             case 'topmargin':
                                 $aStyles[] = 'margin-top: ' . (int) $aItem[1] . 'px';
                                 break;
                             case 'leftmargin':
                                 $aStyles[] = 'margin-left: ' . (int) $aItem[1] . 'px';
                                 break;
                             case 'bottommargin':
                                 $aStyles[] = 'margin-bottom: ' . (int) $aItem[1] . 'px';
                                 break;
                             case 'rightmargin':
                                 $aStyles[] = 'margin-right: ' . (int) $aItem[1] . 'px';
                                 break;
                         }
                     }
                 }
                 if (0 < \count($aStyles)) {
                     $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : '';
                     $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles));
                 }
             }
             if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) {
                 $oElement->setAttribute('src', 'javascript:false');
             }
             if ('a' === $sTagNameLower && !empty($sLinkColor)) {
                 $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : '';
                 $oElement->setAttribute('style', 'color: ' . $sLinkColor . \trim(empty($sStyles) ? '' : '; ' . $sStyles));
             }
             if (\in_array($sTagNameLower, array('a', 'form', 'area'))) {
                 $oElement->setAttribute('target', '_blank');
             }
             if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) {
                 $oElement->setAttribute('tabindex', '-1');
             }
             foreach (array('id', 'class', 'contenteditable', 'designmode', 'formaction', 'data-bind', 'data-reactid', 'xmlns', 'srcset', 'data-x-skip-style') as $sAttr) {
                 @$oElement->removeAttribute($sAttr);
             }
             foreach (array('load', 'blur', 'error', 'focus', 'formchange', 'change', 'click', 'dblclick', 'keydown', 'keypress', 'keyup', 'mousedown', 'mouseenter', 'mouseleave', 'mousemove', 'mouseout', 'mouseover', 'mouseup', 'move', 'resize', 'resizeend', 'resizestart', 'scroll', 'select', 'submit', 'upload') as $sAttr) {
                 @$oElement->removeAttribute('on' . $sAttr);
             }
             if ($oElement->hasAttribute('href')) {
                 $sHref = \trim($oElement->getAttribute('href'));
                 if (!\preg_match('/^(http[s]?|ftp|skype|mailto):/i', $sHref) && '//' !== \substr($sHref, 0, 2)) {
                     $oElement->setAttribute('data-x-broken-href', $sHref);
                     $oElement->setAttribute('href', 'javascript:false');
                 }
                 if ('a' === $sTagNameLower) {
                     $oElement->setAttribute('rel', 'external nofollow');
                 }
             }
             if ($bTryToDetectHiddenImages && 'img' === $sTagNameLower) {
                 $sAlt = $oElement->hasAttribute('alt') ? \trim($oElement->getAttribute('alt')) : '';
                 if ($oElement->hasAttribute('src') && '' === $sAlt) {
                     $aH = array('email.microsoftemail.com/open', 'github.com/notifications/beacon/', 'mandrillapp.com/track/open', 'list-manage.com/track/open');
                     $sH = $oElement->hasAttribute('height') ? \trim($oElement->getAttribute('height')) : '';
                     //						$sW = $oElement->hasAttribute('width')
                     //							? \trim($oElement->getAttribute('width')) : '';
                     $sStyles = $oElement->hasAttribute('style') ? \preg_replace('/[\\s]+/', '', \trim(\trim(\trim($oElement->getAttribute('style')), ';'))) : '';
                     $sSrc = \trim($oElement->getAttribute('src'));
                     $bC = \in_array($sH, array('1', '0', '1px', '0px')) || \preg_match('/(display:none|visibility:hidden|height:0|height:[01][a-z][a-z])/i', $sStyles);
                     if (!$bC) {
                         $sSrcLower = \strtolower($sSrc);
                         foreach ($aH as $sLine) {
                             if (false !== \strpos($sSrcLower, $sLine)) {
                                 $bC = true;
                                 break;
                             }
                         }
                     }
                     if ($bC) {
                         $oElement->setAttribute('style', 'display:none');
                         $oElement->setAttribute('data-x-skip-style', 'true');
                         $oElement->setAttribute('data-x-hidden-src', $sSrc);
                         $oElement->removeAttribute('src');
                     }
                 }
             }
             if ($oElement->hasAttribute('src')) {
                 $sSrc = \trim($oElement->getAttribute('src'));
                 $oElement->removeAttribute('src');
                 if (\in_array($sSrc, $aContentLocationUrls)) {
                     $oElement->setAttribute('data-x-src-location', $sSrc);
                     $aFoundedContentLocationUrls[] = $sSrc;
                 } else {
                     if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) {
                         $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4));
                         $aFoundCIDs[] = \substr($sSrc, 4);
                     } else {
                         if (\preg_match('/^http[s]?:\\/\\//i', $sSrc) || '//' === \substr($sSrc, 0, 2)) {
                             if ($bDoNotReplaceExternalUrl) {
                                 $oElement->setAttribute('src', $sSrc);
                             } else {
                                 $oElement->setAttribute('data-x-src', $sSrc);
                                 if ($fAdditionalExternalFilter) {
                                     $sCallResult = \call_user_func($fAdditionalExternalFilter, $sSrc);
                                     if (0 < \strlen($sCallResult)) {
                                         $oElement->setAttribute('data-x-additional-src', $sCallResult);
                                     }
                                 }
                             }
                             $bHasExternals = true;
                         } else {
                             if ('data:image/' === \strtolower(\substr($sSrc, 0, 11))) {
                                 $oElement->setAttribute('src', $sSrc);
                             } else {
                                 $oElement->setAttribute('data-x-broken-src', $sSrc);
                             }
                         }
                     }
                 }
             }
             $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : '';
             $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : '';
             if (!empty($sBackground) || !empty($sBackgroundColor)) {
                 $aStyles = array();
                 $sStyles = $oElement->hasAttribute('style') ? \trim(\trim(\trim($oElement->getAttribute('style')), ';')) : '';
                 if (!empty($sBackground)) {
                     $aStyles[] = 'background-image: url(\'' . $sBackground . '\')';
                     $oElement->removeAttribute('background');
                 }
                 if (!empty($sBackgroundColor)) {
                     $aStyles[] = 'background-color: ' . $sBackgroundColor;
                     $oElement->removeAttribute('bgcolor');
                 }
                 $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles . '; ') . \implode('; ', $aStyles));
             }
             if ($oElement->hasAttribute('style') && !$oElement->hasAttribute('data-x-skip-style')) {
                 $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl, $fAdditionalExternalFilter));
             }
             $oElement->removeAttribute('data-x-skip-style');
         }
         $sResult = $oDom->saveHTML();
     }
     unset($oDom);
     $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult);
     $sHtmlAttrs = $sBodyAttrs = '';
     $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, $sHtmlAttrs, $sBodyAttrs);
     $sResult = '<div data-x-div-type="body" ' . $sBodyAttrs . '>' . $sResult . '</div>';
     $sResult = '<div data-x-div-type="html" ' . $sHtmlAttrs . '>' . $sResult . '</div>';
     $sResult = \str_replace(\MailSo\Base\HtmlUtils::$KOS, ':', $sResult);
     $sResult = \MailSo\Base\Utils::StripSpaces($sResult);
     return \trim($sResult);
 }