Example #1
0
 function hook_sanitize($doc, $site_url, $allowed_elements, $disallowed_attributes)
 {
     $xpath = new DOMXpath($doc);
     $entries = $xpath->query('//iframe');
     foreach ($entries as $entry) {
         if (!iframe_whitelisted($entry)) {
             $entry->parentNode->removeChild($entry);
         }
     }
     return array($doc, $allowed_elements, $disallowed_attributes);
 }
Example #2
0
function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false)
{
    if (!$owner) {
        $owner = $_SESSION["uid"];
    }
    $res = trim($str);
    if (!$res) {
        return '';
    }
    $charset_hack = '<head>
			<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
		</head>';
    $res = trim($res);
    if (!$res) {
        return '';
    }
    libxml_use_internal_errors(true);
    $doc = new DOMDocument();
    $doc->loadHTML($charset_hack . $res);
    $xpath = new DOMXPath($doc);
    $entries = $xpath->query('(//a[@href]|//img[@src])');
    foreach ($entries as $entry) {
        if ($site_url) {
            if ($entry->hasAttribute('href')) {
                $entry->setAttribute('href', rewrite_relative_url($site_url, $entry->getAttribute('href')));
                $entry->setAttribute('rel', 'noreferrer');
            }
            if ($entry->hasAttribute('src')) {
                $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
                $cached_filename = CACHE_DIR . '/images/' . sha1($src) . '.png';
                if (file_exists($cached_filename)) {
                    $src = SELF_URL_PATH . '/image.php?hash=' . sha1($src);
                }
                $entry->setAttribute('src', $src);
            }
            if ($entry->nodeName == 'img') {
                if ($owner && get_pref("STRIP_IMAGES", $owner) || $force_remove_images || $_SESSION["bw_limit"]) {
                    $p = $doc->createElement('p');
                    $a = $doc->createElement('a');
                    $a->setAttribute('href', $entry->getAttribute('src'));
                    $a->appendChild(new DOMText($entry->getAttribute('src')));
                    $a->setAttribute('target', '_blank');
                    $p->appendChild($a);
                    $entry->parentNode->replaceChild($p, $entry);
                }
            }
        }
        if (strtolower($entry->nodeName) == "a") {
            $entry->setAttribute("target", "_blank");
        }
    }
    $entries = $xpath->query('//iframe');
    foreach ($entries as $entry) {
        if (!iframe_whitelisted($entry)) {
            $entry->setAttribute('sandbox', 'allow-scripts');
        } else {
            if ($_SERVER['HTTPS'] == "on") {
                $entry->setAttribute("src", str_replace("http://", "https://", $entry->getAttribute("src")));
            }
        }
    }
    $allowed_elements = array('a', 'address', 'audio', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'body', 'br', 'caption', 'cite', 'center', 'code', 'col', 'colgroup', 'data', 'dd', 'del', 'details', 'div', 'dl', 'font', 'dt', 'em', 'footer', 'figure', 'figcaption', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'html', 'i', 'img', 'ins', 'kbd', 'li', 'main', 'mark', 'nav', 'noscript', 'ol', 'p', 'pre', 'q', 'ruby', 'rp', 'rt', 's', 'samp', 'section', 'small', 'source', 'span', 'strike', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'track', 'tt', 'u', 'ul', 'var', 'wbr', 'video');
    if ($_SESSION['hasSandbox']) {
        $allowed_elements[] = 'iframe';
    }
    $disallowed_attributes = array('id', 'style', 'class');
    foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_SANITIZE) as $plugin) {
        $retval = $plugin->hook_sanitize($doc, $site_url, $allowed_elements, $disallowed_attributes, $article_id);
        if (is_array($retval)) {
            $doc = $retval[0];
            $allowed_elements = $retval[1];
            $disallowed_attributes = $retval[2];
        } else {
            $doc = $retval;
        }
    }
    $doc->removeChild($doc->firstChild);
    //remove doctype
    $doc = strip_harmful_tags($doc, $allowed_elements, $disallowed_attributes);
    if ($highlight_words) {
        foreach ($highlight_words as $word) {
            // http://stackoverflow.com/questions/4081372/highlight-keywords-in-a-paragraph
            $elements = $xpath->query("//*/text()");
            foreach ($elements as $child) {
                $fragment = $doc->createDocumentFragment();
                $text = $child->textContent;
                while (($pos = mb_stripos($text, $word)) !== false) {
                    $fragment->appendChild(new DomText(mb_substr($text, 0, $pos)));
                    $word = mb_substr($text, $pos, mb_strlen($word));
                    $highlight = $doc->createElement('span');
                    $highlight->appendChild(new DomText($word));
                    $highlight->setAttribute('class', 'highlight');
                    $fragment->appendChild($highlight);
                    $text = mb_substr($text, $pos + mb_strlen($word));
                }
                if (!empty($text)) {
                    $fragment->appendChild(new DomText($text));
                }
                $child->parentNode->replaceChild($fragment, $child);
            }
        }
    }
    $res = $doc->saveHTML();
    return $res;
}