Example #1
0
/**
 * Checks a CSS style sheet (high level).
 *
 * @param  string			The data of the style sheet
 * @return ?map			Error information (NULL: no error)
 */
function check_css($data)
{
    if (!isset($GLOBALS['MAIL_MODE'])) {
        $GLOBALS['MAIL_MODE'] = false;
    }
    $_errors = _validate_css_sheet($data);
    if (is_null($_errors)) {
        $_errors = array();
    }
    $errors = array();
    global $POS, $OUT;
    global $CSS_TAG_RANGES, $CSS_VALUE_RANGES;
    $OUT = $data;
    foreach ($_errors as $error) {
        $POS = 0;
        $errors[] = _xhtml_error($error[0], array_key_exists(1, $error) ? $error[1] : '', array_key_exists(2, $error) ? $error[2] : '', array_key_exists(3, $error) ? $error[3] : '', false, $error['pos']);
    }
    return array('level_ranges' => NULL, 'tag_ranges' => $CSS_TAG_RANGES, 'value_ranges' => $CSS_VALUE_RANGES, 'errors' => $errors);
}
Example #2
0
/**
 * Check the specified XHTML, and return the results.
 *
 * @param  string			The XHTML to validate
 * @param  boolean		Whether to avoid checking for relational errors (false implies just a quick structural check, aka a 'well formed' check)
 * @param  boolean		Whether what is being validated is an HTML fragment, rather than a whole document
 * @param  boolean		Validate javascript
 * @param  boolean		Validate CSS
 * @param  boolean		Validate WCAG
 * @param  boolean		Validate for compatibility
 * @param  boolean		Validate external files
 * @param  boolean		Bring up messages about manual checks
 * @return ?map			Error information (NULL: no error)
 */
function check_xhtml($out, $well_formed_only = false, $is_fragment = false, $validation_javascript = true, $validation_css = true, $validation_wcag = true, $validation_compat = true, $validation_ext_files = true, $validation_manual = false)
{
    global $XHTML_VALIDATOR_OFF, $WELL_FORMED_ONLY, $VALIDATION_JAVASCRIPT, $VALIDATION_CSS, $VALIDATION_WCAG, $VALIDATION_COMPAT, $VALIDATION_EXT_FILES, $VALIDATION_MANUAL, $UNDER_XMLNS;
    $XHTML_VALIDATOR_OFF = mixed();
    $WELL_FORMED_ONLY = $well_formed_only;
    if (!$WELL_FORMED_ONLY) {
        require_code('validation2');
    }
    $VALIDATION_JAVASCRIPT = $validation_javascript;
    $VALIDATION_CSS = $validation_css;
    $VALIDATION_WCAG = $validation_wcag;
    $VALIDATION_COMPAT = $validation_compat;
    $VALIDATION_EXT_FILES = $validation_ext_files;
    $VALIDATION_MANUAL = $validation_manual;
    global $IDS_SO_FAR;
    $IDS_SO_FAR = array();
    $content_start_stack = array();
    global $BLOCK_CONSTRAIN, $XML_CONSTRAIN, $LAST_TAG_ATTRIBUTES, $FOUND_DOCTYPE, $FOUND_DESCRIPTION, $FOUND_KEYWORDS, $FOUND_CONTENTTYPE, $THE_DOCTYPE, $TAGS_DEPRECATE_ALLOW, $URL_BASE, $PARENT_TAG, $TABS_SEEN, $KEYS_SEEN, $ANCHORS_SEEN, $ATT_STACK, $TAG_STACK, $POS, $LINENO, $LINESTART, $OUT, $T_POS, $PROHIBITIONS, $ONLY_PARENT, $ONLY_CHILDREN, $REQUIRE_ANCESTER, $LEN, $ANCESTER_BLOCK, $ANCESTER_INLINE, $POSSIBLY_EMPTY_TAGS, $MUST_SELFCLOSE_TAGS, $FOR_LABEL_IDS, $FOR_LABEL_IDS_2, $INPUT_TAG_IDS;
    global $TAG_RANGES, $VALUE_RANGES, $LAST_A_TAG, $A_LINKS, $XHTML_FORM_ENCODING;
    global $AREA_LINKS, $LAST_HEADING, $CRAWLED_URLS, $HYPERLINK_URLS, $EMBED_URLS, $THE_LANGUAGE, $PSPELL_LINK;
    global $TAGS_BLOCK, $TAGS_INLINE, $TAGS_NORMAL, $TAGS_BLOCK_DEPRECATED, $TAGS_INLINE_DEPRECATED, $TAGS_NORMAL_DEPRECATED;
    $PSPELL_LINK = NULL;
    $THE_LANGUAGE = 'en';
    $THE_DOCTYPE = $is_fragment ? DOCTYPE_XHTML : DOCTYPE_HTML;
    $TAGS_DEPRECATE_ALLOW = true;
    $XML_CONSTRAIN = $is_fragment;
    $BLOCK_CONSTRAIN = false;
    $LINENO = 0;
    $LINESTART = 0;
    $HYPERLINK_URLS = array();
    $EMBED_URLS = array();
    $AREA_LINKS = array();
    $LAST_HEADING = 0;
    $FOUND_DOCTYPE = false;
    $FOUND_CONTENTTYPE = false;
    $FOUND_KEYWORDS = false;
    $FOUND_DESCRIPTION = false;
    $CRAWLED_URLS = array();
    $PARENT_TAG = '';
    $XHTML_FORM_ENCODING = '';
    $UNDER_XMLNS = false;
    $KEYS_SEEN = array();
    $TABS_SEEN = array();
    $TAG_RANGES = array();
    $VALUE_RANGES = array();
    $LAST_A_TAG = NULL;
    $ANCHORS_SEEN = array();
    $FOR_LABEL_IDS = array();
    $FOR_LABEL_IDS_2 = array();
    $INPUT_TAG_IDS = array();
    $TAG_STACK = array();
    $ATT_STACK = array();
    $ANCESTER_BLOCK = 0;
    $ANCESTER_INLINE = 0;
    $POS = 0;
    $OUT = $out;
    unset($out);
    $LEN = strlen($OUT);
    $level_ranges = array();
    $stack_size = 0;
    $to_find = array('html' => 1, 'head' => 1, 'title' => 1);
    $only_one_of_stack = array();
    $only_one_of_template = array('title' => 1, 'head' => 1, 'body' => 1, 'base' => 1, 'thead' => 1, 'tfoot' => 1);
    $only_one_of = $only_one_of_template;
    $A_LINKS = array();
    $previous = '';
    if (!isset($GLOBALS['MAIL_MODE'])) {
        $GLOBALS['MAIL_MODE'] = false;
    }
    $errors = array();
    $bad_root = false;
    $token = _get_next_tag();
    while (!is_null($token)) {
        //		echo $T_POS.'-'.$POS.' ('.$stack_size.')<br />';
        while (is_array($token) && count($token) != 0) {
            if (is_null($XHTML_VALIDATOR_OFF)) {
                foreach ($token[1] as $error) {
                    $errors[] = _xhtml_error($error[0], array_key_exists(1, $error) ? $error[1] : '', array_key_exists(2, $error) ? $error[2] : '', array_key_exists(3, $error) ? $error[3] : '', array_key_exists('raw', $error) ? $error['raw'] : false, array_key_exists('pos', $error) ? $error['pos'] : 0);
                }
                if (is_null($token[0])) {
                    return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors);
                }
            }
            $token = $token[0];
        }
        $basis_token = _get_tag_basis($token);
        // Open, close, or monitonic?
        $term = strpos($token, '/');
        if (!is_null($XHTML_VALIDATOR_OFF)) {
            if ($term === false) {
                $XHTML_VALIDATOR_OFF++;
            } elseif ($term == 1) {
                if ($XHTML_VALIDATOR_OFF == 0) {
                    $XHTML_VALIDATOR_OFF = NULL;
                } else {
                    $XHTML_VALIDATOR_OFF--;
                }
            }
        }
        if ($term !== 1) {
            if (isset($only_one_of[$basis_token])) {
                if ($only_one_of[$basis_token] == 0) {
                    $errors[] = _xhtml_error('XHTML_ONLY_ONE_ALLOWED', $basis_token);
                }
                $only_one_of[$basis_token]--;
            }
            //			echo 'Push $basis_token<br />';
            $level_ranges[] = array($stack_size, $T_POS, $POS);
            if (isset($to_find[$basis_token])) {
                unset($to_find[$basis_token]);
            }
            if (!$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) {
                if (!$is_fragment && $stack_size == 0 && $basis_token != 'html') {
                    $errors[] = _xhtml_error('XHTML_BAD_ROOT');
                    $bad_root = true;
                }
                if ($stack_size != 0) {
                    if (isset($ONLY_CHILDREN[$PARENT_TAG])) {
                        if (!in_array($basis_token, $ONLY_CHILDREN[$PARENT_TAG])) {
                            $errors[] = _xhtml_error('XHTML_BAD_CHILD', $basis_token, $PARENT_TAG);
                        }
                    }
                    /*if (isset($PROHIBITIONS[$PARENT_TAG]))
                    		{
                    			$prohibitions=$PROHIBITIONS[$PARENT_TAG];
                    			if (in_array($basis_token,$prohibitions)) $errors[]=_xhtml_error('XHTML_PROHIBITION',$basis_token,$PARENT_TAG);
                    		}*/
                    foreach ($TAG_STACK as $parent_tag) {
                        if (isset($PROHIBITIONS[$parent_tag])) {
                            $prohibitions = $PROHIBITIONS[$parent_tag];
                            if (in_array($basis_token, $prohibitions)) {
                                $errors[] = _xhtml_error('XHTML_PROHIBITION', $basis_token, $parent_tag);
                            }
                        }
                    }
                }
                if (isset($REQUIRE_ANCESTER[$basis_token]) && !$is_fragment) {
                    if (!in_array($REQUIRE_ANCESTER[$basis_token], $TAG_STACK)) {
                        $errors[] = _xhtml_error('XHTML_MISSING_ANCESTER', $basis_token, $REQUIRE_ANCESTER[$basis_token]);
                    }
                }
                if (isset($ONLY_PARENT[$basis_token])) {
                    if ($stack_size == 0) {
                        if (!$is_fragment) {
                            $errors[] = _xhtml_error('XHTML_BAD_PARENT', $basis_token, '/');
                        }
                    } else {
                        if (!in_array($PARENT_TAG, $ONLY_PARENT[$basis_token])) {
                            $errors[] = _xhtml_error('XHTML_BAD_PARENT', $basis_token, $PARENT_TAG);
                        }
                    }
                }
            }
            // In order to ease validation, we tolerate these in the parser (but of course, mark as errors)
            if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && $term === false && isset($MUST_SELFCLOSE_TAGS[$basis_token])) {
                if ($XML_CONSTRAIN) {
                    $errors[] = _xhtml_error('XHTML_NONEMPTY_TAG', $basis_token);
                }
            } else {
                if ($term === false) {
                    $PARENT_TAG = $basis_token;
                    array_push($TAG_STACK, $basis_token);
                    array_push($ATT_STACK, $LAST_TAG_ATTRIBUTES);
                    array_push($content_start_stack, $POS);
                    array_push($only_one_of_stack, $only_one_of);
                    $only_one_of = $only_one_of_template;
                    ++$stack_size;
                } else {
                    if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && (!$XML_CONSTRAIN || !isset($MUST_SELFCLOSE_TAGS[$basis_token])) && is_null($XHTML_VALIDATOR_OFF)) {
                        if (!$bad_root) {
                            $errors[] = _xhtml_error('XHTML_CEMPTY_TAG', $basis_token);
                        }
                    }
                }
            }
        } elseif ($term == 1) {
            // HTML allows implicit closing. We will flag errors when we have to do it. See 1-2-3 note
            do {
                // For case 3 (see note below)
                if (!in_array($basis_token, $TAG_STACK)) {
                    if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) {
                        $errors[] = _xhtml_error('XML_NO_CLOSE_MATCH', $basis_token, $previous);
                    }
                    break;
                }
                $previous = array_pop($TAG_STACK);
                $PARENT_TAG = $TAG_STACK == array() ? '' : $TAG_STACK[count($TAG_STACK) - 1];
                $start_pos = array_pop($content_start_stack);
                array_pop($ATT_STACK);
                $only_one_of = array_pop($only_one_of_stack);
                if (is_null($previous)) {
                    if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) {
                        $errors[] = _xhtml_error('XML_MORE_CLOSE_THAN_OPEN', $basis_token);
                    }
                    break;
                }
                if ($basis_token != $previous) {
                    // This is really tricky, and totally XHTML-incompliant. There are three situations:
                    // 1) Overlapping tags. We really can't survive this, and it's very invalid. We could only detect it if we broke support for cases (1) and (2). e.g. <i><b></i></b>
                    // 2) Implicit closing. We close everything implicitly until we find the matching tag. E.g. <i><b></i>
                    // 3) Closing something that was never open. This is tricky - we can't survive it if it was opened somewhere as a parent, as we'd end up closing a whole load of tags by rule (2) - but if it's a lone closing, we can skip it. Good e.g. <b></i></b>. Bad e.g. <div><p></div></p></div>
                    if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) {
                        $errors[] = _xhtml_error('XML_NO_CLOSE_MATCH', $basis_token, $previous);
                    }
                }
                if (!$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) {
                    if (isset($MUST_SELFCLOSE_TAGS[$previous]) && $XML_CONSTRAIN) {
                        $errors[] = _xhtml_error('XHTML_NONEMPTY_TAG', $previous);
                    }
                    if (!isset($MUST_SELFCLOSE_TAGS[$previous]) && !isset($POSSIBLY_EMPTY_TAGS[$previous]) && trim(substr($OUT, $start_pos, $T_POS - $start_pos)) == '') {
                        if (isset($TAGS_BLOCK[$previous]) || isset($TAGS_INLINE[$previous]) || isset($TAGS_NORMAL[$previous]) || isset($TAGS_BLOCK_DEPRECATED[$previous]) || isset($TAGS_INLINE_DEPRECATED[$previous]) || isset($TAGS_NORMAL_DEPRECATED[$previous])) {
                            $errors[] = _xhtml_error('XHTML_EMPTY_TAG', $previous);
                        }
                    }
                }
                $stack_size--;
                $level_ranges[] = array($stack_size, $T_POS, $POS);
                //			echo 'Popped $previous<br />';
                if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) {
                    if ($previous == 'script') {
                        $tag_contents = substr($OUT, $start_pos, $T_POS - $start_pos);
                        $c_section = strpos($tag_contents, ']]>');
                        if (trim($tag_contents) != '' && strpos($tag_contents, '//-->') === false && strpos($tag_contents, '// -->') === false && $c_section === false) {
                            $errors[] = _xhtml_error('XHTML_SCRIPT_COMMENTING', $previous);
                        } elseif ($c_section === false && strpos($tag_contents, '<!--') !== false) {
                            if ($XML_CONSTRAIN) {
                                $errors[] = _xhtml_error('XHTML_CDATA');
                            }
                        }
                        if (strpos($tag_contents, '</') !== false) {
                            $errors[] = _xhtml_error('XML_JS_TAG_ESCAPE');
                        }
                    }
                }
            } while ($basis_token != $previous);
        }
        /*else
        		{
        			$level_ranges[]=array($stack_size,$T_POS,$POS);
        			// it's monitonic, so ignore
        		}*/
        $token = _get_next_tag();
    }
    // Check we have everything closed
    if ($stack_size != 0) {
        if ($XML_CONSTRAIN) {
            $errors[] = _xhtml_error('XML_NO_CLOSE', array_pop($TAG_STACK));
        }
        return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors);
    }
    if (!$well_formed_only) {
        if (!$is_fragment) {
            foreach (array_keys($to_find) as $tag) {
                $errors[] = _xhtml_error('XHTML_MISSING_TAG', $tag);
            }
            if (!$FOUND_DOCTYPE && !$GLOBALS['MAIL_MODE']) {
                $errors[] = _xhtml_error('XHTML_DOCTYPE');
            }
            if ($FOUND_DOCTYPE && $GLOBALS['MAIL_MODE']) {
                $errors[] = _xhtml_error('MAIL_DOCTYPE');
            }
            if (!$FOUND_CONTENTTYPE) {
                $errors[] = _xhtml_error('XHTML_CONTENTTYPE');
            }
            //if (!$FOUND_KEYWORDS) $errors[]=_xhtml_error('XHTML_KEYWORDS');
            //if (!$FOUND_DESCRIPTION) $errors[]=_xhtml_error('XHTML_DESCRIPTION');
        }
        if (!$is_fragment) {
            // Check that all area-links have a corresponding hyperlink
            foreach (array_keys($AREA_LINKS) as $id) {
                if (!in_array($id, $HYPERLINK_URLS)) {
                    $errors[] = _xhtml_error('WCAG_AREA_EQUIV', $id);
                }
            }
            // Check that all labels apply to real input tags
            foreach (array_keys($FOR_LABEL_IDS_2) as $id) {
                if (!isset($INPUT_TAG_IDS[$id])) {
                    $errors[] = _xhtml_error('XHTML_ID_UNBOUND', $id);
                }
            }
        }
    }
    // Main spelling
    if (function_exists('pspell_new') && isset($GLOBALS['SPELLING'])) {
        $stripped = $OUT;
        $matches = array();
        $num_matches = preg_match_all('#\\<style.*\\</style\\>#Umis', $stripped, $matches);
        for ($i = 0; $i < $num_matches; $i++) {
            $stripped = str_replace($matches[0][$i], str_repeat(' ', strlen($matches[0][$i])), $stripped);
        }
        $num_matches = preg_match_all('#\\<script.*\\</script\\>#Umis', $stripped, $matches);
        for ($i = 0; $i < $num_matches; $i++) {
            $stripped = str_replace($matches[0][$i], str_repeat(' ', strlen($matches[0][$i])), $stripped);
        }
        $stripped = @html_entity_decode(strip_tags($stripped), ENT_QUOTES, get_charset());
        require_code('validation2');
        $new_errors = validate_spelling($stripped);
        $misspellings = array();
        global $POS, $LINENO, $LINESTART;
        foreach ($new_errors as $error) {
            if (array_key_exists($error[1], $misspellings)) {
                continue;
            }
            $misspellings[$error[1]] = 1;
            $POS = strpos($OUT, $error[1]);
            $LINESTART = strrpos(substr($OUT, 0, $POS), chr(10));
            $LINENO = substr_count(substr($OUT, 0, $LINESTART), chr(10)) + 1;
            $errors[] = _xhtml_error($error[0], $error[1]);
        }
    }
    unset($OUT);
    return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors);
}