Example #1
0
/**
 * This function looks for the next tag.
 *
 * @param  $body   String where to look for the next tag.
 * @param  $offset Start looking from here.
 * @return         false if no more tags exist in the body, or
 *                 an array with the following members:
 *                 - string with the name of the tag
 *                 - array with attributes and their values
 *                 - integer with tag type (1, 2, or 3)
 *                 - integer where the tag starts (starting "<")
 *                 - integer where the tag ends (ending ">")
 *                 first three members will be false, if the tag is invalid.
 */
function sq_getnxtag($body, $offset)
{
    $me = 'sq_getnxtag';
    if ($offset > strlen($body)) {
        return false;
    }
    $lt = sq_findnxstr($body, $offset, "<");
    if ($lt == strlen($body)) {
        return false;
    }
    /**
     * We are here:
     * blah blah <tag attribute="value">
     * \---------^
     */
    $pos = sq_skipspace($body, $lt + 1);
    if ($pos >= strlen($body)) {
        return array(false, false, false, $lt, strlen($body));
    }
    /**
     * There are 3 kinds of tags:
     * 1. Opening tag, e.g.:
     *    <a href="blah">
     * 2. Closing tag, e.g.:
     *    </a>
     * 3. XHTML-style content-less tag, e.g.:
     *    <img src="blah" />
     */
    $tagtype = false;
    switch (substr($body, $pos, 1)) {
        case '/':
            $tagtype = 2;
            $pos++;
            break;
        case '!':
            /**
             * A comment or an SGML declaration.
             */
            if (substr($body, $pos + 1, 2) == "--") {
                $gt = strpos($body, "-->", $pos);
                if ($gt === false) {
                    $gt = strlen($body);
                } else {
                    $gt += 2;
                }
                return array(false, false, false, $lt, $gt);
            } else {
                $gt = sq_findnxstr($body, $pos, ">");
                return array(false, false, false, $lt, $gt);
            }
            break;
        default:
            /**
             * Assume tagtype 1 for now. If it's type 3, we'll switch values
             * later.
             */
            $tagtype = 1;
            break;
    }
    $tag_start = $pos;
    $tagname = '';
    /**
     * Look for next [\W-_], which will indicate the end of the tag name.
     */
    $regary = sq_findnxreg($body, $pos, "[^\\w\\-_]");
    if ($regary == false) {
        return array(false, false, false, $lt, strlen($body));
    }
    list($pos, $tagname, $match) = $regary;
    $tagname = strtolower($tagname);
    /**
     * $match can be either of these:
     * '>'  indicating the end of the tag entirely.
     * '\s' indicating the end of the tag name.
     * '/'  indicating that this is type-3 xhtml tag.
     *
     * Whatever else we find there indicates an invalid tag.
     */
    switch ($match) {
        case '/':
            /**
             * This is an xhtml-style tag with a closing / at the
             * end, like so: <img src="blah" />. Check if it's followed
             * by the closing bracket. If not, then this tag is invalid
             */
            if (substr($body, $pos, 2) == "/>") {
                $pos++;
                $tagtype = 3;
            } else {
                $gt = sq_findnxstr($body, $pos, ">");
                $retary = array(false, false, false, $lt, $gt);
                return $retary;
            }
        case '>':
            return array($tagname, false, $tagtype, $lt, $pos);
            break;
        default:
            /**
             * Check if it's whitespace
             */
            if (!preg_match('/\\s/', $match)) {
                /**
                 * This is an invalid tag! Look for the next closing ">".
                 */
                $gt = sq_findnxstr($body, $lt, ">");
                return array(false, false, false, $lt, $gt);
            }
            break;
    }
    /**
     * At this point we're here:
     * <tagname  attribute='blah'>
     * \-------^
     *
     * At this point we loop in order to find all attributes.
     */
    $attname = '';
    $atttype = false;
    $attary = array();
    while ($pos <= strlen($body)) {
        $pos = sq_skipspace($body, $pos);
        if ($pos == strlen($body)) {
            /**
             * Non-closed tag.
             */
            return array(false, false, false, $lt, $pos);
        }
        /**
         * See if we arrived at a ">" or "/>", which means that we reached
         * the end of the tag.
         */
        $matches = array();
        if (preg_match("%^(\\s*)(>|/>)%s", substr($body, $pos), $matches)) {
            /**
             * Yep. So we did.
             */
            $pos += strlen($matches[1]);
            if ($matches[2] == "/>") {
                $tagtype = 3;
                $pos++;
            }
            return array($tagname, $attary, $tagtype, $lt, $pos);
        }
        /**
         * There are several types of attributes, with optional
         * [:space:] between members.
         * Type 1:
         *   attrname[:space:]=[:space:]'CDATA'
         * Type 2:
         *   attrname[:space:]=[:space:]"CDATA"
         * Type 3:
         *   attr[:space:]=[:space:]CDATA
         * Type 4:
         *   attrname
         *
         * We leave types 1 and 2 the same, type 3 we check for
         * '"' and convert to "&quot" if needed, then wrap in
         * double quotes. Type 4 we convert into:
         * attrname="yes".
         */
        $regary = sq_findnxreg($body, $pos, "[^:\\w\\-_]");
        if ($regary == false) {
            /**
             * Looks like body ended before the end of tag.
             */
            return array(false, false, false, $lt, strlen($body));
        }
        list($pos, $attname, $match) = $regary;
        $attname = strtolower($attname);
        /**
         * We arrived at the end of attribute name. Several things possible
         * here:
         * '>'  means the end of the tag and this is attribute type 4
         * '/'  if followed by '>' means the same thing as above
         * '\s' means a lot of things -- look what it's followed by.
         *      anything else means the attribute is invalid.
         */
        switch ($match) {
            case '/':
                /**
                 * This is an xhtml-style tag with a closing / at the
                 * end, like so: <img src="blah" />. Check if it's followed
                 * by the closing bracket. If not, then this tag is invalid
                 */
                if (substr($body, $pos, 2) == "/>") {
                    $pos++;
                    $tagtype = 3;
                } else {
                    $gt = sq_findnxstr($body, $pos, ">");
                    $retary = array(false, false, false, $lt, $gt);
                    return $retary;
                }
            case '>':
                $attary[$attname] = '"yes"';
                return array($tagname, $attary, $tagtype, $lt, $pos);
                break;
            default:
                /**
                 * Skip whitespace and see what we arrive at.
                 */
                $pos = sq_skipspace($body, $pos);
                $char = substr($body, $pos, 1);
                /**
                 * Two things are valid here:
                 * '=' means this is attribute type 1 2 or 3.
                 * \w means this was attribute type 4.
                 * anything else we ignore and re-loop. End of tag and
                 * invalid stuff will be caught by our checks at the beginning
                 * of the loop.
                 */
                if ($char == "=") {
                    $pos++;
                    $pos = sq_skipspace($body, $pos);
                    /**
                     * Here are 3 possibilities:
                     * "'"  attribute type 1
                     * '"'  attribute type 2
                     * everything else is the content of tag type 3
                     */
                    $quot = substr($body, $pos, 1);
                    if ($quot == "'") {
                        $regary = sq_findnxreg($body, $pos + 1, "\\'");
                        if ($regary == false) {
                            return array(false, false, false, $lt, strlen($body));
                        }
                        list($pos, $attval, $match) = $regary;
                        $pos++;
                        $attary[$attname] = "'" . $attval . "'";
                    } else {
                        if ($quot == '"') {
                            $regary = sq_findnxreg($body, $pos + 1, '\\"');
                            if ($regary == false) {
                                return array(false, false, false, $lt, strlen($body));
                            }
                            list($pos, $attval, $match) = $regary;
                            $pos++;
                            $attary[$attname] = '"' . $attval . '"';
                        } else {
                            /**
                             * These are hateful. Look for \s, or >.
                             */
                            $regary = sq_findnxreg($body, $pos, "[\\s>]");
                            if ($regary == false) {
                                return array(false, false, false, $lt, strlen($body));
                            }
                            list($pos, $attval, $match) = $regary;
                            /**
                             * If it's ">" it will be caught at the top.
                             */
                            $attval = preg_replace("/\"/s", "&quot;", $attval);
                            $attary[$attname] = '"' . $attval . '"';
                        }
                    }
                } else {
                    if (preg_match("|[\\w/>]|", $char)) {
                        /**
                         * That was attribute type 4.
                         */
                        $attary[$attname] = '"yes"';
                    } else {
                        /**
                         * An illegal character. Find next '>' and return.
                         */
                        $gt = sq_findnxstr($body, $pos, ">");
                        return array(false, false, false, $lt, $gt);
                    }
                }
                break;
        }
    }
    /**
     * The fact that we got here indicates that the tag end was never
     * found. Return invalid tag indication so it gets stripped.
     */
    return array(false, false, false, $lt, strlen($body));
}
Example #2
0
/**
 * This function edits the style definition to make them friendly and
 * usable in squirrelmail.
 *
 * @param  $message  the message object
 * @param  $id       the message id
 * @param  $content  a string with whatever is between <style> and </style>
 * @param  $mailbox  the message mailbox
 * @return           a string with edited content.
 */
function sq_fixstyle($body, $pos, $message, $id, $mailbox)
{
    global $view_unsafe_images;
    $me = 'sq_fixstyle';
    $ret = sq_findnxreg($body, $pos, '</\\s*style\\s*>');
    if ($ret == FALSE) {
        return array(FALSE, strlen($body));
    }
    $newpos = $ret[0] + strlen($ret[2]);
    $content = $ret[1];
    /**
     * First look for general BODY style declaration, which would be
     * like so:
     * body {background: blah-blah}
     * and change it to .bodyclass so we can just assign it to a <div>
     */
    $content = preg_replace("|body(\\s*\\{.*?\\})|si", ".bodyclass\\1", $content);
    $secremoveimg = '../images/' . _("sec_remove_eng.png");
    /**
     * Fix url('blah') declarations.
     */
    $content = preg_replace("|url\\s*\\(\\s*([\\'\"])\\s*\\S+script\\s*:.*?([\\'\"])\\s*\\)|si", "url(\\1{$secremoveimg}\\2)", $content);
    /**
     * Fix url('https*://.*) declarations but only if $view_unsafe_images
     * is false.
     */
    if (!$view_unsafe_images) {
        $content = preg_replace("|url\\s*\\(\\s*([\\'\"])\\s*https*:.*?([\\'\"])\\s*\\)|si", "url(\\1{$secremoveimg}\\2)", $content);
    }
    /**
     * Fix urls that refer to cid:
     */
    while (preg_match("|url\\s*\\(\\s*([\\'\"]\\s*cid:.*?[\\'\"])\\s*\\)|si", $content, $matches)) {
        $cidurl = $matches[1];
        $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
        $content = preg_replace("|url\\s*\\(\\s*{$cidurl}\\s*\\)|si", "url({$httpurl})", $content);
    }
    /**
     * Fix stupid css declarations which lead to vulnerabilities
     * in IE.
     */
    $match = array('/expression/i', '/behaviou*r/i', '/binding/i', '/include-source/i');
    $replace = array('idiocy', 'idiocy', 'idiocy', 'idiocy');
    $content = preg_replace($match, $replace, $content);
    return array($content, $newpos);
}