/** * Checks a CSS style sheet (high level). * * @param string The data of the style sheet * @return ?map Error information (NULL: no error) */ function check_css($data) { if (!isset($GLOBALS['MAIL_MODE'])) { $GLOBALS['MAIL_MODE'] = false; } $_errors = _validate_css_sheet($data); if (is_null($_errors)) { $_errors = array(); } $errors = array(); global $POS, $OUT; global $CSS_TAG_RANGES, $CSS_VALUE_RANGES; $OUT = $data; foreach ($_errors as $error) { $POS = 0; $errors[] = _xhtml_error($error[0], array_key_exists(1, $error) ? $error[1] : '', array_key_exists(2, $error) ? $error[2] : '', array_key_exists(3, $error) ? $error[3] : '', false, $error['pos']); } return array('level_ranges' => NULL, 'tag_ranges' => $CSS_TAG_RANGES, 'value_ranges' => $CSS_VALUE_RANGES, 'errors' => $errors); }
/** * Check the specified XHTML, and return the results. * * @param string The XHTML to validate * @param boolean Whether to avoid checking for relational errors (false implies just a quick structural check, aka a 'well formed' check) * @param boolean Whether what is being validated is an HTML fragment, rather than a whole document * @param boolean Validate javascript * @param boolean Validate CSS * @param boolean Validate WCAG * @param boolean Validate for compatibility * @param boolean Validate external files * @param boolean Bring up messages about manual checks * @return ?map Error information (NULL: no error) */ function check_xhtml($out, $well_formed_only = false, $is_fragment = false, $validation_javascript = true, $validation_css = true, $validation_wcag = true, $validation_compat = true, $validation_ext_files = true, $validation_manual = false) { global $XHTML_VALIDATOR_OFF, $WELL_FORMED_ONLY, $VALIDATION_JAVASCRIPT, $VALIDATION_CSS, $VALIDATION_WCAG, $VALIDATION_COMPAT, $VALIDATION_EXT_FILES, $VALIDATION_MANUAL, $UNDER_XMLNS; $XHTML_VALIDATOR_OFF = mixed(); $WELL_FORMED_ONLY = $well_formed_only; if (!$WELL_FORMED_ONLY) { require_code('validation2'); } $VALIDATION_JAVASCRIPT = $validation_javascript; $VALIDATION_CSS = $validation_css; $VALIDATION_WCAG = $validation_wcag; $VALIDATION_COMPAT = $validation_compat; $VALIDATION_EXT_FILES = $validation_ext_files; $VALIDATION_MANUAL = $validation_manual; global $IDS_SO_FAR; $IDS_SO_FAR = array(); $content_start_stack = array(); global $BLOCK_CONSTRAIN, $XML_CONSTRAIN, $LAST_TAG_ATTRIBUTES, $FOUND_DOCTYPE, $FOUND_DESCRIPTION, $FOUND_KEYWORDS, $FOUND_CONTENTTYPE, $THE_DOCTYPE, $TAGS_DEPRECATE_ALLOW, $URL_BASE, $PARENT_TAG, $TABS_SEEN, $KEYS_SEEN, $ANCHORS_SEEN, $ATT_STACK, $TAG_STACK, $POS, $LINENO, $LINESTART, $OUT, $T_POS, $PROHIBITIONS, $ONLY_PARENT, $ONLY_CHILDREN, $REQUIRE_ANCESTER, $LEN, $ANCESTER_BLOCK, $ANCESTER_INLINE, $POSSIBLY_EMPTY_TAGS, $MUST_SELFCLOSE_TAGS, $FOR_LABEL_IDS, $FOR_LABEL_IDS_2, $INPUT_TAG_IDS; global $TAG_RANGES, $VALUE_RANGES, $LAST_A_TAG, $A_LINKS, $XHTML_FORM_ENCODING; global $AREA_LINKS, $LAST_HEADING, $CRAWLED_URLS, $HYPERLINK_URLS, $EMBED_URLS, $THE_LANGUAGE, $PSPELL_LINK; global $TAGS_BLOCK, $TAGS_INLINE, $TAGS_NORMAL, $TAGS_BLOCK_DEPRECATED, $TAGS_INLINE_DEPRECATED, $TAGS_NORMAL_DEPRECATED; $PSPELL_LINK = NULL; $THE_LANGUAGE = 'en'; $THE_DOCTYPE = $is_fragment ? DOCTYPE_XHTML : DOCTYPE_HTML; $TAGS_DEPRECATE_ALLOW = true; $XML_CONSTRAIN = $is_fragment; $BLOCK_CONSTRAIN = false; $LINENO = 0; $LINESTART = 0; $HYPERLINK_URLS = array(); $EMBED_URLS = array(); $AREA_LINKS = array(); $LAST_HEADING = 0; $FOUND_DOCTYPE = false; $FOUND_CONTENTTYPE = false; $FOUND_KEYWORDS = false; $FOUND_DESCRIPTION = false; $CRAWLED_URLS = array(); $PARENT_TAG = ''; $XHTML_FORM_ENCODING = ''; $UNDER_XMLNS = false; $KEYS_SEEN = array(); $TABS_SEEN = array(); $TAG_RANGES = array(); $VALUE_RANGES = array(); $LAST_A_TAG = NULL; $ANCHORS_SEEN = array(); $FOR_LABEL_IDS = array(); $FOR_LABEL_IDS_2 = array(); $INPUT_TAG_IDS = array(); $TAG_STACK = array(); $ATT_STACK = array(); $ANCESTER_BLOCK = 0; $ANCESTER_INLINE = 0; $POS = 0; $OUT = $out; unset($out); $LEN = strlen($OUT); $level_ranges = array(); $stack_size = 0; $to_find = array('html' => 1, 'head' => 1, 'title' => 1); $only_one_of_stack = array(); $only_one_of_template = array('title' => 1, 'head' => 1, 'body' => 1, 'base' => 1, 'thead' => 1, 'tfoot' => 1); $only_one_of = $only_one_of_template; $A_LINKS = array(); $previous = ''; if (!isset($GLOBALS['MAIL_MODE'])) { $GLOBALS['MAIL_MODE'] = false; } $errors = array(); $bad_root = false; $token = _get_next_tag(); while (!is_null($token)) { // echo $T_POS.'-'.$POS.' ('.$stack_size.')<br />'; while (is_array($token) && count($token) != 0) { if (is_null($XHTML_VALIDATOR_OFF)) { foreach ($token[1] as $error) { $errors[] = _xhtml_error($error[0], array_key_exists(1, $error) ? $error[1] : '', array_key_exists(2, $error) ? $error[2] : '', array_key_exists(3, $error) ? $error[3] : '', array_key_exists('raw', $error) ? $error['raw'] : false, array_key_exists('pos', $error) ? $error['pos'] : 0); } if (is_null($token[0])) { return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors); } } $token = $token[0]; } $basis_token = _get_tag_basis($token); // Open, close, or monitonic? $term = strpos($token, '/'); if (!is_null($XHTML_VALIDATOR_OFF)) { if ($term === false) { $XHTML_VALIDATOR_OFF++; } elseif ($term == 1) { if ($XHTML_VALIDATOR_OFF == 0) { $XHTML_VALIDATOR_OFF = NULL; } else { $XHTML_VALIDATOR_OFF--; } } } if ($term !== 1) { if (isset($only_one_of[$basis_token])) { if ($only_one_of[$basis_token] == 0) { $errors[] = _xhtml_error('XHTML_ONLY_ONE_ALLOWED', $basis_token); } $only_one_of[$basis_token]--; } // echo 'Push $basis_token<br />'; $level_ranges[] = array($stack_size, $T_POS, $POS); if (isset($to_find[$basis_token])) { unset($to_find[$basis_token]); } if (!$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) { if (!$is_fragment && $stack_size == 0 && $basis_token != 'html') { $errors[] = _xhtml_error('XHTML_BAD_ROOT'); $bad_root = true; } if ($stack_size != 0) { if (isset($ONLY_CHILDREN[$PARENT_TAG])) { if (!in_array($basis_token, $ONLY_CHILDREN[$PARENT_TAG])) { $errors[] = _xhtml_error('XHTML_BAD_CHILD', $basis_token, $PARENT_TAG); } } /*if (isset($PROHIBITIONS[$PARENT_TAG])) { $prohibitions=$PROHIBITIONS[$PARENT_TAG]; if (in_array($basis_token,$prohibitions)) $errors[]=_xhtml_error('XHTML_PROHIBITION',$basis_token,$PARENT_TAG); }*/ foreach ($TAG_STACK as $parent_tag) { if (isset($PROHIBITIONS[$parent_tag])) { $prohibitions = $PROHIBITIONS[$parent_tag]; if (in_array($basis_token, $prohibitions)) { $errors[] = _xhtml_error('XHTML_PROHIBITION', $basis_token, $parent_tag); } } } } if (isset($REQUIRE_ANCESTER[$basis_token]) && !$is_fragment) { if (!in_array($REQUIRE_ANCESTER[$basis_token], $TAG_STACK)) { $errors[] = _xhtml_error('XHTML_MISSING_ANCESTER', $basis_token, $REQUIRE_ANCESTER[$basis_token]); } } if (isset($ONLY_PARENT[$basis_token])) { if ($stack_size == 0) { if (!$is_fragment) { $errors[] = _xhtml_error('XHTML_BAD_PARENT', $basis_token, '/'); } } else { if (!in_array($PARENT_TAG, $ONLY_PARENT[$basis_token])) { $errors[] = _xhtml_error('XHTML_BAD_PARENT', $basis_token, $PARENT_TAG); } } } } // In order to ease validation, we tolerate these in the parser (but of course, mark as errors) if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && $term === false && isset($MUST_SELFCLOSE_TAGS[$basis_token])) { if ($XML_CONSTRAIN) { $errors[] = _xhtml_error('XHTML_NONEMPTY_TAG', $basis_token); } } else { if ($term === false) { $PARENT_TAG = $basis_token; array_push($TAG_STACK, $basis_token); array_push($ATT_STACK, $LAST_TAG_ATTRIBUTES); array_push($content_start_stack, $POS); array_push($only_one_of_stack, $only_one_of); $only_one_of = $only_one_of_template; ++$stack_size; } else { if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && (!$XML_CONSTRAIN || !isset($MUST_SELFCLOSE_TAGS[$basis_token])) && is_null($XHTML_VALIDATOR_OFF)) { if (!$bad_root) { $errors[] = _xhtml_error('XHTML_CEMPTY_TAG', $basis_token); } } } } } elseif ($term == 1) { // HTML allows implicit closing. We will flag errors when we have to do it. See 1-2-3 note do { // For case 3 (see note below) if (!in_array($basis_token, $TAG_STACK)) { if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) { $errors[] = _xhtml_error('XML_NO_CLOSE_MATCH', $basis_token, $previous); } break; } $previous = array_pop($TAG_STACK); $PARENT_TAG = $TAG_STACK == array() ? '' : $TAG_STACK[count($TAG_STACK) - 1]; $start_pos = array_pop($content_start_stack); array_pop($ATT_STACK); $only_one_of = array_pop($only_one_of_stack); if (is_null($previous)) { if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) { $errors[] = _xhtml_error('XML_MORE_CLOSE_THAN_OPEN', $basis_token); } break; } if ($basis_token != $previous) { // This is really tricky, and totally XHTML-incompliant. There are three situations: // 1) Overlapping tags. We really can't survive this, and it's very invalid. We could only detect it if we broke support for cases (1) and (2). e.g. <i><b></i></b> // 2) Implicit closing. We close everything implicitly until we find the matching tag. E.g. <i><b></i> // 3) Closing something that was never open. This is tricky - we can't survive it if it was opened somewhere as a parent, as we'd end up closing a whole load of tags by rule (2) - but if it's a lone closing, we can skip it. Good e.g. <b></i></b>. Bad e.g. <div><p></div></p></div> if (is_null($XHTML_VALIDATOR_OFF) && $XML_CONSTRAIN) { $errors[] = _xhtml_error('XML_NO_CLOSE_MATCH', $basis_token, $previous); } } if (!$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) { if (isset($MUST_SELFCLOSE_TAGS[$previous]) && $XML_CONSTRAIN) { $errors[] = _xhtml_error('XHTML_NONEMPTY_TAG', $previous); } if (!isset($MUST_SELFCLOSE_TAGS[$previous]) && !isset($POSSIBLY_EMPTY_TAGS[$previous]) && trim(substr($OUT, $start_pos, $T_POS - $start_pos)) == '') { if (isset($TAGS_BLOCK[$previous]) || isset($TAGS_INLINE[$previous]) || isset($TAGS_NORMAL[$previous]) || isset($TAGS_BLOCK_DEPRECATED[$previous]) || isset($TAGS_INLINE_DEPRECATED[$previous]) || isset($TAGS_NORMAL_DEPRECATED[$previous])) { $errors[] = _xhtml_error('XHTML_EMPTY_TAG', $previous); } } } $stack_size--; $level_ranges[] = array($stack_size, $T_POS, $POS); // echo 'Popped $previous<br />'; if (is_null($XHTML_VALIDATOR_OFF) && !$WELL_FORMED_ONLY && is_null($XHTML_VALIDATOR_OFF)) { if ($previous == 'script') { $tag_contents = substr($OUT, $start_pos, $T_POS - $start_pos); $c_section = strpos($tag_contents, ']]>'); if (trim($tag_contents) != '' && strpos($tag_contents, '//-->') === false && strpos($tag_contents, '// -->') === false && $c_section === false) { $errors[] = _xhtml_error('XHTML_SCRIPT_COMMENTING', $previous); } elseif ($c_section === false && strpos($tag_contents, '<!--') !== false) { if ($XML_CONSTRAIN) { $errors[] = _xhtml_error('XHTML_CDATA'); } } if (strpos($tag_contents, '</') !== false) { $errors[] = _xhtml_error('XML_JS_TAG_ESCAPE'); } } } } while ($basis_token != $previous); } /*else { $level_ranges[]=array($stack_size,$T_POS,$POS); // it's monitonic, so ignore }*/ $token = _get_next_tag(); } // Check we have everything closed if ($stack_size != 0) { if ($XML_CONSTRAIN) { $errors[] = _xhtml_error('XML_NO_CLOSE', array_pop($TAG_STACK)); } return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors); } if (!$well_formed_only) { if (!$is_fragment) { foreach (array_keys($to_find) as $tag) { $errors[] = _xhtml_error('XHTML_MISSING_TAG', $tag); } if (!$FOUND_DOCTYPE && !$GLOBALS['MAIL_MODE']) { $errors[] = _xhtml_error('XHTML_DOCTYPE'); } if ($FOUND_DOCTYPE && $GLOBALS['MAIL_MODE']) { $errors[] = _xhtml_error('MAIL_DOCTYPE'); } if (!$FOUND_CONTENTTYPE) { $errors[] = _xhtml_error('XHTML_CONTENTTYPE'); } //if (!$FOUND_KEYWORDS) $errors[]=_xhtml_error('XHTML_KEYWORDS'); //if (!$FOUND_DESCRIPTION) $errors[]=_xhtml_error('XHTML_DESCRIPTION'); } if (!$is_fragment) { // Check that all area-links have a corresponding hyperlink foreach (array_keys($AREA_LINKS) as $id) { if (!in_array($id, $HYPERLINK_URLS)) { $errors[] = _xhtml_error('WCAG_AREA_EQUIV', $id); } } // Check that all labels apply to real input tags foreach (array_keys($FOR_LABEL_IDS_2) as $id) { if (!isset($INPUT_TAG_IDS[$id])) { $errors[] = _xhtml_error('XHTML_ID_UNBOUND', $id); } } } } // Main spelling if (function_exists('pspell_new') && isset($GLOBALS['SPELLING'])) { $stripped = $OUT; $matches = array(); $num_matches = preg_match_all('#\\<style.*\\</style\\>#Umis', $stripped, $matches); for ($i = 0; $i < $num_matches; $i++) { $stripped = str_replace($matches[0][$i], str_repeat(' ', strlen($matches[0][$i])), $stripped); } $num_matches = preg_match_all('#\\<script.*\\</script\\>#Umis', $stripped, $matches); for ($i = 0; $i < $num_matches; $i++) { $stripped = str_replace($matches[0][$i], str_repeat(' ', strlen($matches[0][$i])), $stripped); } $stripped = @html_entity_decode(strip_tags($stripped), ENT_QUOTES, get_charset()); require_code('validation2'); $new_errors = validate_spelling($stripped); $misspellings = array(); global $POS, $LINENO, $LINESTART; foreach ($new_errors as $error) { if (array_key_exists($error[1], $misspellings)) { continue; } $misspellings[$error[1]] = 1; $POS = strpos($OUT, $error[1]); $LINESTART = strrpos(substr($OUT, 0, $POS), chr(10)); $LINENO = substr_count(substr($OUT, 0, $LINESTART), chr(10)) + 1; $errors[] = _xhtml_error($error[0], $error[1]); } } unset($OUT); return array('level_ranges' => $level_ranges, 'tag_ranges' => $TAG_RANGES, 'value_ranges' => $VALUE_RANGES, 'errors' => $errors); }