getHTMLDefinition() public method

Retrieves object reference to the HTML definition.
public getHTMLDefinition ( boolean $raw = false, boolean $optimized = false ) : HTMLPurifier_HTMLDefinition
$raw boolean Return a copy that has not been setup yet. Must be called before it's been setup, otherwise won't work.
$optimized boolean If true, this method may return null, to indicate that a cached version of the modified definition object is available and no further edits are necessary. Consider using maybeGetRawHTMLDefinition, which is more explicitly named, instead.
return HTMLPurifier_HTMLDefinition
示例#1
0
 /**
  * @param HTMLPurifier_Config $config
  * @param HTMLPurifier_Context $context
  */
 public function __construct($config, $context)
 {
     $this->config = $config;
     $this->_scriptFix = $config->get('Output.CommentScriptContents');
     $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
     $this->_sortAttr = $config->get('Output.SortAttr');
     $this->_flashCompat = $config->get('Output.FlashCompat');
     $this->_def = $config->getHTMLDefinition();
     $this->_xhtml = $this->_def->doctype->xml;
 }
示例#2
0
 /**
  * @param array $children
  * @param HTMLPurifier_Config $config
  * @param HTMLPurifier_Context $context
  * @return array
  */
 public function validateChildren($children, $config, $context)
 {
     // Flag for subclasses
     $this->whitespace = false;
     // if there are no tokens, delete parent node
     if (empty($children)) {
         return false;
     }
     // if li is not allowed, delete parent node
     if (!isset($config->getHTMLDefinition()->info['li'])) {
         trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING);
         return false;
     }
     // the new set of children
     $result = array();
     // a little sanity check to make sure it's not ALL whitespace
     $all_whitespace = true;
     $current_li = false;
     foreach ($children as $node) {
         if (!empty($node->is_whitespace)) {
             $result[] = $node;
             continue;
         }
         $all_whitespace = false;
         // phew, we're not talking about whitespace
         if ($node->name === 'li') {
             // good
             $current_li = $node;
             $result[] = $node;
         } else {
             // we want to tuck this into the previous li
             // Invariant: we expect the node to be ol/ul
             // ToDo: Make this more robust in the case of not ol/ul
             // by distinguishing between existing li and li created
             // to handle non-list elements; non-list elements should
             // not be appended to an existing li; only li created
             // for non-list. This distinction is not currently made.
             if ($current_li === false) {
                 $current_li = new HTMLPurifier_Node_Element('li');
                 $result[] = $current_li;
             }
             $current_li->children[] = $node;
             $current_li->empty = false;
             // XXX fascinating! Check for this error elsewhere ToDo
         }
     }
     if (empty($result)) {
         return false;
     }
     if ($all_whitespace) {
         return false;
     }
     return $result;
 }
示例#3
0
 /**
  * @param HTMLPurifier_Config $config
  * @return string
  */
 public function render($config)
 {
     $ret = '';
     $this->config =& $config;
     $this->def = $config->getHTMLDefinition();
     $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
     $ret .= $this->renderDoctype();
     $ret .= $this->renderEnvironment();
     $ret .= $this->renderContentSets();
     $ret .= $this->renderInfo();
     $ret .= $this->end('div');
     return $ret;
 }
 /**
  * @param  string|array|HTMLPurifier_Config $config
  * @param  HTMLPurifier_ConfigSchema $schema
  * @return HTMLPurifier_Config
  */
 public static function create($config = null, HTMLPurifier_ConfigSchema $schema = null)
 {
     if (!$schema instanceof HTMLPurifier_ConfigSchema) {
         $schema = HTMLPurifier_ConfigSchema::makeFromSerial();
     }
     if ($config instanceof HTMLPurifier_Config) {
         $configObj = $config;
     } else {
         $configObj = new HTMLPurifier_Config($schema);
         $configObj->set('Core.Encoding', 'UTF-8');
         $configObj->set('HTML.Doctype', 'HTML 4.01 Transitional');
         if (is_string($config)) {
             $configObj->loadIni($config);
         } elseif (is_array($config)) {
             $configObj->loadArray($config);
         }
     }
     $def = $configObj->getHTMLDefinition(true);
     // this finalizes config
     HTMLPurifier_HTML5Definition::setup($def);
     return $configObj;
 }
 /**
  * @param HTMLPurifier_Token[] $tokens
  * @param HTMLPurifier_Config $config
  * @param HTMLPurifier_Context $context
  * @return array|HTMLPurifier_Token[]
  */
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     $generator = new HTMLPurifier_Generator($config, $context);
     $result = array();
     $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
     $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
     // currently only used to determine if comments should be kept
     $trusted = $config->get('HTML.Trusted');
     $comment_lookup = $config->get('HTML.AllowedComments');
     $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
     $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
     $remove_script_contents = $config->get('Core.RemoveScriptContents');
     $hidden_elements = $config->get('Core.HiddenElements');
     // remove script contents compatibility
     if ($remove_script_contents === true) {
         $hidden_elements['script'] = true;
     } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
         unset($hidden_elements['script']);
     }
     $attr_validator = new HTMLPurifier_AttrValidator();
     // removes tokens until it reaches a closing tag with its value
     $remove_until = false;
     // converts comments into text tokens when this is equal to a tag name
     $textify_comments = false;
     $token = false;
     $context->register('CurrentToken', $token);
     $e = false;
     if ($config->get('Core.CollectErrors')) {
         $e =& $context->get('ErrorCollector');
     }
     foreach ($tokens as $token) {
         if ($remove_until) {
             if (empty($token->is_tag) || $token->name !== $remove_until) {
                 continue;
             }
         }
         if (!empty($token->is_tag)) {
             // DEFINITION CALL
             // before any processing, try to transform the element
             if (isset($definition->info_tag_transform[$token->name])) {
                 $original_name = $token->name;
                 // there is a transformation for this tag
                 // DEFINITION CALL
                 $token = $definition->info_tag_transform[$token->name]->transform($token, $config, $context);
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
                 }
             }
             if (isset($definition->info[$token->name])) {
                 // mostly everything's good, but
                 // we need to make sure required attributes are in order
                 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img)) {
                     $attr_validator->validateToken($token, $config, $context);
                     $ok = true;
                     foreach ($definition->info[$token->name]->required_attr as $name) {
                         if (!isset($token->attr[$name])) {
                             $ok = false;
                             break;
                         }
                     }
                     if (!$ok) {
                         if ($e) {
                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
                         }
                         continue;
                     }
                     $token->armor['ValidateAttributes'] = true;
                 }
                 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
                     $textify_comments = $token->name;
                 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
                     $textify_comments = false;
                 }
             } elseif ($escape_invalid_tags) {
                 // invalid tag, generate HTML representation and insert in
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
             } else {
                 // check if we need to destroy all of the tag's children
                 // CAN BE GENERICIZED
                 if (isset($hidden_elements[$token->name])) {
                     if ($token instanceof HTMLPurifier_Token_Start) {
                         $remove_until = $token->name;
                     } elseif ($token instanceof HTMLPurifier_Token_Empty) {
                         // do nothing: we're still looking
                     } else {
                         $remove_until = false;
                     }
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                     }
                 } else {
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
                     }
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
             // textify comments in script tags when they are allowed
             if ($textify_comments !== false) {
                 $data = $token->data;
                 $token = new HTMLPurifier_Token_Text($data);
             } elseif ($trusted || $check_comments) {
                 // always cleanup comments
                 $trailing_hyphen = false;
                 if ($e) {
                     // perform check whether or not there's a trailing hyphen
                     if (substr($token->data, -1) == '-') {
                         $trailing_hyphen = true;
                     }
                 }
                 $token->data = rtrim($token->data, '-');
                 $found_double_hyphen = false;
                 while (strpos($token->data, '--') !== false) {
                     $found_double_hyphen = true;
                     $token->data = str_replace('--', '-', $token->data);
                 }
                 if ($trusted || !empty($comment_lookup[trim($token->data)]) || $comment_regexp !== null && preg_match($comment_regexp, trim($token->data))) {
                     // OK good
                     if ($e) {
                         if ($trailing_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
                         }
                         if ($found_double_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
                         }
                     }
                 } else {
                     if ($e) {
                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                     }
                     continue;
                 }
             } else {
                 // strip comments
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
         } else {
             continue;
         }
         $result[] = $token;
     }
     if ($remove_until && $e) {
         // we removed tokens until the end, throw error
         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
     }
     $context->destroy('CurrentToken');
     return $result;
 }
示例#6
0
 /**
  * Validates the attributes of a token, mutating it as necessary.
  * that has valid tokens
  * @param HTMLPurifier_Token $token Token to validate.
  * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  */
 public function validateToken($token, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     $e =& $context->get('ErrorCollector', true);
     // initialize IDAccumulator if necessary
     $ok =& $context->get('IDAccumulator', true);
     if (!$ok) {
         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
         $context->register('IDAccumulator', $id_accumulator);
     }
     // initialize CurrentToken if necessary
     $current_token =& $context->get('CurrentToken', true);
     if (!$current_token) {
         $context->register('CurrentToken', $token);
     }
     if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) {
         return;
     }
     // create alias to global definition array, see also $defs
     // DEFINITION CALL
     $d_defs = $definition->info_global_attr;
     // don't update token until the very end, to ensure an atomic update
     $attr = $token->attr;
     // do global transformations (pre)
     // nothing currently utilizes this
     foreach ($definition->info_attr_transform_pre as $transform) {
         $attr = $transform->transform($o = $attr, $config, $context);
         if ($e) {
             if ($attr != $o) {
                 $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
             }
         }
     }
     // do local transformations only applicable to this element (pre)
     // ex. <p align="right"> to <p style="text-align:right;">
     foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
         $attr = $transform->transform($o = $attr, $config, $context);
         if ($e) {
             if ($attr != $o) {
                 $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
             }
         }
     }
     // create alias to this element's attribute definition array, see
     // also $d_defs (global attribute definition array)
     // DEFINITION CALL
     $defs = $definition->info[$token->name]->attr;
     $attr_key = false;
     $context->register('CurrentAttr', $attr_key);
     // iterate through all the attribute keypairs
     // Watch out for name collisions: $key has previously been used
     foreach ($attr as $attr_key => $value) {
         // call the definition
         if (isset($defs[$attr_key])) {
             // there is a local definition defined
             if ($defs[$attr_key] === false) {
                 // We've explicitly been told not to allow this element.
                 // This is usually when there's a global definition
                 // that must be overridden.
                 // Theoretically speaking, we could have a
                 // AttrDef_DenyAll, but this is faster!
                 $result = false;
             } else {
                 // validate according to the element's definition
                 $result = $defs[$attr_key]->validate($value, $config, $context);
             }
         } elseif (isset($d_defs[$attr_key])) {
             // there is a global definition defined, validate according
             // to the global definition
             $result = $d_defs[$attr_key]->validate($value, $config, $context);
         } else {
             // system never heard of the attribute? DELETE!
             $result = false;
         }
         // put the results into effect
         if ($result === false || $result === null) {
             // this is a generic error message that should replaced
             // with more specific ones when possible
             if ($e) {
                 $e->send(E_ERROR, 'AttrValidator: Attribute removed');
             }
             // remove the attribute
             unset($attr[$attr_key]);
         } elseif (is_string($result)) {
             // generally, if a substitution is happening, there
             // was some sort of implicit correction going on. We'll
             // delegate it to the attribute classes to say exactly what.
             // simple substitution
             $attr[$attr_key] = $result;
         } else {
             // nothing happens
         }
         // we'd also want slightly more complicated substitution
         // involving an array as the return value,
         // although we're not sure how colliding attributes would
         // resolve (certain ones would be completely overriden,
         // others would prepend themselves).
     }
     $context->destroy('CurrentAttr');
     // post transforms
     // global (error reporting untested)
     foreach ($definition->info_attr_transform_post as $transform) {
         $attr = $transform->transform($o = $attr, $config, $context);
         if ($e) {
             if ($attr != $o) {
                 $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
             }
         }
     }
     // local (error reporting untested)
     foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
         $attr = $transform->transform($o = $attr, $config, $context);
         if ($e) {
             if ($attr != $o) {
                 $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
             }
         }
     }
     $token->attr = $attr;
     // destroy CurrentToken if we made it ourselves
     if (!$current_token) {
         $context->destroy('CurrentToken');
     }
 }
示例#7
0
 /**
  * This function checks if the HTML environment
  * will work with the Injector: if p tags are not allowed, the
  * Auto-Paragraphing injector should not be enabled.
  * @param HTMLPurifier_Config $config
  * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
  */
 public function checkNeeded($config)
 {
     $def = $config->getHTMLDefinition();
     foreach ($this->needed as $element => $attributes) {
         if (is_int($element)) {
             $element = $attributes;
         }
         if (!isset($def->info[$element])) {
             return $element;
         }
         if (!is_array($attributes)) {
             continue;
         }
         foreach ($attributes as $name) {
             if (!isset($def->info[$element]->attr[$name])) {
                 return "{$element}.{$name}";
             }
         }
     }
     return false;
 }
示例#8
0
 /**
  * @param HTMLPurifier_Config $config
  */
 private function init($config)
 {
     if (!$this->init) {
         $def = $config->getHTMLDefinition();
         // allow all inline elements
         $this->real_elements = $this->elements;
         $this->fake_elements = $def->info_content_sets['Flow'];
         $this->fake_elements['#PCDATA'] = true;
         $this->init = true;
     }
 }
 /**
  * @param HTMLPurifier_Token[] $tokens
  * @param HTMLPurifier_Config $config
  * @param HTMLPurifier_Context $context
  * @return HTMLPurifier_Token[]
  * @throws HTMLPurifier_Exception
  */
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     // local variables
     $generator = new HTMLPurifier_Generator($config, $context);
     $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
     // used for autoclose early abortion
     $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
     $e = $context->get('ErrorCollector', true);
     $i = false;
     // injector index
     list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
     if ($token === NULL) {
         return array();
     }
     $reprocess = false;
     // whether or not to reprocess the same token
     $stack = array();
     // member variables
     $this->stack =& $stack;
     $this->tokens =& $tokens;
     $this->token =& $token;
     $this->zipper =& $zipper;
     $this->config = $config;
     $this->context = $context;
     // context variables
     $context->register('CurrentNesting', $stack);
     $context->register('InputZipper', $zipper);
     $context->register('CurrentToken', $token);
     // -- begin INJECTOR --
     $this->injectors = array();
     $injectors = $config->getBatch('AutoFormat');
     $def_injectors = $definition->info_injector;
     $custom_injectors = $injectors['Custom'];
     unset($injectors['Custom']);
     // special case
     foreach ($injectors as $injector => $b) {
         // XXX: Fix with a legitimate lookup table of enabled filters
         if (strpos($injector, '.') !== false) {
             continue;
         }
         $injector = "HTMLPurifier_Injector_{$injector}";
         if (!$b) {
             continue;
         }
         $this->injectors[] = new $injector();
     }
     foreach ($def_injectors as $injector) {
         // assumed to be objects
         $this->injectors[] = $injector;
     }
     foreach ($custom_injectors as $injector) {
         if (!$injector) {
             continue;
         }
         if (is_string($injector)) {
             $injector = "HTMLPurifier_Injector_{$injector}";
             $injector = new $injector();
         }
         $this->injectors[] = $injector;
     }
     // give the injectors references to the definition and context
     // variables for performance reasons
     foreach ($this->injectors as $ix => $injector) {
         $error = $injector->prepare($config, $context);
         if (!$error) {
             continue;
         }
         array_splice($this->injectors, $ix, 1);
         // rm the injector
         trigger_error("Cannot enable {$injector->name} injector because {$error} is not allowed", E_USER_WARNING);
     }
     // -- end INJECTOR --
     // a note on reprocessing:
     //      In order to reduce code duplication, whenever some code needs
     //      to make HTML changes in order to make things "correct", the
     //      new HTML gets sent through the purifier, regardless of its
     //      status. This means that if we add a start token, because it
     //      was totally necessary, we don't have to update nesting; we just
     //      punt ($reprocess = true; continue;) and it does that for us.
     // isset is in loop because $tokens size changes during loop exec
     for (;; $reprocess ? $reprocess = false : ($token = $zipper->next($token))) {
         // check for a rewind
         if (is_int($i)) {
             // possibility: disable rewinding if the current token has a
             // rewind set on it already. This would offer protection from
             // infinite loop, but might hinder some advanced rewinding.
             $rewind_offset = $this->injectors[$i]->getRewindOffset();
             if (is_int($rewind_offset)) {
                 for ($j = 0; $j < $rewind_offset; $j++) {
                     if (empty($zipper->front)) {
                         break;
                     }
                     $token = $zipper->prev($token);
                     // indicate that other injectors should not process this token,
                     // but we need to reprocess it
                     unset($token->skip[$i]);
                     $token->rewind = $i;
                     if ($token instanceof HTMLPurifier_Token_Start) {
                         array_pop($this->stack);
                     } elseif ($token instanceof HTMLPurifier_Token_End) {
                         $this->stack[] = $token->start;
                     }
                 }
             }
             $i = false;
         }
         // handle case of document end
         if ($token === NULL) {
             // kill processing if stack is empty
             if (empty($this->stack)) {
                 break;
             }
             // peek
             $top_nesting = array_pop($this->stack);
             $this->stack[] = $top_nesting;
             // send error [TagClosedSuppress]
             if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
             }
             // append, don't splice, since this is the end
             $token = new HTMLPurifier_Token_End($top_nesting->name);
             // punt!
             $reprocess = true;
             continue;
         }
         //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
         //flush();
         // quick-check: if it's not a tag, no need to process
         if (empty($token->is_tag)) {
             if ($token instanceof HTMLPurifier_Token_Text) {
                 foreach ($this->injectors as $i => $injector) {
                     if (isset($token->skip[$i])) {
                         continue;
                     }
                     if ($token->rewind !== null && $token->rewind !== $i) {
                         continue;
                     }
                     // XXX fuckup
                     $r = $token;
                     $injector->handleText($r);
                     $token = $this->processToken($r, $i);
                     $reprocess = true;
                     break;
                 }
             }
             // another possibility is a comment
             continue;
         }
         if (isset($definition->info[$token->name])) {
             $type = $definition->info[$token->name]->child->type;
         } else {
             $type = false;
             // Type is unknown, treat accordingly
         }
         // quick tag checks: anything that's *not* an end tag
         $ok = false;
         if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
             // claims to be a start tag but is empty
             $token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
             $ok = true;
         } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
             // claims to be empty but really is a start tag
             // NB: this assignment is required
             $old_token = $token;
             $token = new HTMLPurifier_Token_End($token->name);
             $token = $this->insertBefore(new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor));
             // punt (since we had to modify the input stream in a non-trivial way)
             $reprocess = true;
             continue;
         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
             // real empty token
             $ok = true;
         } elseif ($token instanceof HTMLPurifier_Token_Start) {
             // start tag
             // ...unless they also have to close their parent
             if (!empty($this->stack)) {
                 // Performance note: you might think that it's rather
                 // inefficient, recalculating the autoclose information
                 // for every tag that a token closes (since when we
                 // do an autoclose, we push a new token into the
                 // stream and then /process/ that, before
                 // re-processing this token.)  But this is
                 // necessary, because an injector can make an
                 // arbitrary transformations to the autoclosing
                 // tokens we introduce, so things may have changed
                 // in the meantime.  Also, doing the inefficient thing is
                 // "easy" to reason about (for certain perverse definitions
                 // of "easy")
                 $parent = array_pop($this->stack);
                 $this->stack[] = $parent;
                 $parent_def = null;
                 $parent_elements = null;
                 $autoclose = false;
                 if (isset($definition->info[$parent->name])) {
                     $parent_def = $definition->info[$parent->name];
                     $parent_elements = $parent_def->child->getAllowedElements($config);
                     $autoclose = !isset($parent_elements[$token->name]);
                 }
                 if ($autoclose && $definition->info[$token->name]->wrap) {
                     // Check if an element can be wrapped by another
                     // element to make it valid in a context (for
                     // example, <ul><ul> needs a <li> in between)
                     $wrapname = $definition->info[$token->name]->wrap;
                     $wrapdef = $definition->info[$wrapname];
                     $elements = $wrapdef->child->getAllowedElements($config);
                     if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
                         $newtoken = new HTMLPurifier_Token_Start($wrapname);
                         $token = $this->insertBefore($newtoken);
                         $reprocess = true;
                         continue;
                     }
                 }
                 $carryover = false;
                 if ($autoclose && $parent_def->formatting) {
                     $carryover = true;
                 }
                 if ($autoclose) {
                     // check if this autoclose is doomed to fail
                     // (this rechecks $parent, which his harmless)
                     $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
                     if (!$autoclose_ok) {
                         foreach ($this->stack as $ancestor) {
                             $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
                             if (isset($elements[$token->name])) {
                                 $autoclose_ok = true;
                                 break;
                             }
                             if ($definition->info[$token->name]->wrap) {
                                 $wrapname = $definition->info[$token->name]->wrap;
                                 $wrapdef = $definition->info[$wrapname];
                                 $wrap_elements = $wrapdef->child->getAllowedElements($config);
                                 if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
                                     $autoclose_ok = true;
                                     break;
                                 }
                             }
                         }
                     }
                     if ($autoclose_ok) {
                         // errors need to be updated
                         $new_token = new HTMLPurifier_Token_End($parent->name);
                         $new_token->start = $parent;
                         // [TagClosedSuppress]
                         if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
                             if (!$carryover) {
                                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
                             } else {
                                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
                             }
                         }
                         if ($carryover) {
                             $element = clone $parent;
                             // [TagClosedAuto]
                             $element->armor['MakeWellFormed_TagClosedError'] = true;
                             $element->carryover = true;
                             $token = $this->processToken(array($new_token, $token, $element));
                         } else {
                             $token = $this->insertBefore($new_token);
                         }
                     } else {
                         $token = $this->remove();
                     }
                     $reprocess = true;
                     continue;
                 }
             }
             $ok = true;
         }
         if ($ok) {
             foreach ($this->injectors as $i => $injector) {
                 if (isset($token->skip[$i])) {
                     continue;
                 }
                 if ($token->rewind !== null && $token->rewind !== $i) {
                     continue;
                 }
                 $r = $token;
                 $injector->handleElement($r);
                 $token = $this->processToken($r, $i);
                 $reprocess = true;
                 break;
             }
             if (!$reprocess) {
                 // ah, nothing interesting happened; do normal processing
                 if ($token instanceof HTMLPurifier_Token_Start) {
                     $this->stack[] = $token;
                 } elseif ($token instanceof HTMLPurifier_Token_End) {
                     throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
                 }
             }
             continue;
         }
         // sanity check: we should be dealing with a closing tag
         if (!$token instanceof HTMLPurifier_Token_End) {
             throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
         }
         // make sure that we have something open
         if (empty($this->stack)) {
             if ($escape_invalid_tags) {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
             } else {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
                 }
                 $token = $this->remove();
             }
             $reprocess = true;
             continue;
         }
         // first, check for the simplest case: everything closes neatly.
         // Eventually, everything passes through here; if there are problems
         // we modify the input stream accordingly and then punt, so that
         // the tokens get processed again.
         $current_parent = array_pop($this->stack);
         if ($current_parent->name == $token->name) {
             $token->start = $current_parent;
             foreach ($this->injectors as $i => $injector) {
                 if (isset($token->skip[$i])) {
                     continue;
                 }
                 if ($token->rewind !== null && $token->rewind !== $i) {
                     continue;
                 }
                 $r = $token;
                 $injector->handleEnd($r);
                 $token = $this->processToken($r, $i);
                 $this->stack[] = $current_parent;
                 $reprocess = true;
                 break;
             }
             continue;
         }
         // okay, so we're trying to close the wrong tag
         // undo the pop previous pop
         $this->stack[] = $current_parent;
         // scroll back the entire nest, trying to find our tag.
         // (feature could be to specify how far you'd like to go)
         $size = count($this->stack);
         // -2 because -1 is the last element, but we already checked that
         $skipped_tags = false;
         for ($j = $size - 2; $j >= 0; $j--) {
             if ($this->stack[$j]->name == $token->name) {
                 $skipped_tags = array_slice($this->stack, $j);
                 break;
             }
         }
         // we didn't find the tag, so remove
         if ($skipped_tags === false) {
             if ($escape_invalid_tags) {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
             } else {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
                 }
                 $token = $this->remove();
             }
             $reprocess = true;
             continue;
         }
         // do errors, in REVERSE $j order: a,b,c with </a></b></c>
         $c = count($skipped_tags);
         if ($e) {
             for ($j = $c - 1; $j > 0; $j--) {
                 // notice we exclude $j == 0, i.e. the current ending tag, from
                 // the errors... [TagClosedSuppress]
                 if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
                 }
             }
         }
         // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
         $replace = array($token);
         for ($j = 1; $j < $c; $j++) {
             // ...as well as from the insertions
             $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
             $new_token->start = $skipped_tags[$j];
             array_unshift($replace, $new_token);
             if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
                 // [TagClosedAuto]
                 $element = clone $skipped_tags[$j];
                 $element->carryover = true;
                 $element->armor['MakeWellFormed_TagClosedError'] = true;
                 $replace[] = $element;
             }
         }
         $token = $this->processToken($replace);
         $reprocess = true;
         continue;
     }
     $context->destroy('CurrentToken');
     $context->destroy('CurrentNesting');
     $context->destroy('InputZipper');
     unset($this->injectors, $this->stack, $this->tokens);
     return $zipper->toArray($token);
 }
示例#10
0
 /**
  * HTML定義を追加
  *
  * @param HTMLPurifier_Config $config HTMLPurifier_Config instance
  * @return void
  */
 private function __addHtmlDef(HTMLPurifier_Config $config)
 {
     if ($def = $config->getHTMLDefinition(true, true)) {
         // http://developers.whatwg.org/sections.html
         $def->addElement('article', 'Block', 'Flow', 'Common');
         $def->addElement('section', 'Block', 'Flow', 'Common');
         $def->addElement('nav', 'Block', 'Flow', 'Common');
         $def->addElement('aside', 'Block', 'Flow', 'Common');
         $def->addElement('hgroup', 'Block', 'Required: h1 | h2 | h3 | h4 | h5 | h6', 'Common');
         $def->addElement('header', 'Block', 'Flow', 'Common');
         $def->addElement('footer', 'Block', 'Flow', 'Common');
         // http://developers.whatwg.org/grouping-content.html
         $def->addElement('figure', 'Block', 'Optional: (figcaption, Flow) | (Flow, figcaption) | Flow', 'Common');
         $def->addElement('figcaption', 'Inline', 'Flow', 'Common');
         // http://developers.whatwg.org/the-video-element.html#the-video-element
         $def->addElement('video', 'Block', 'Optional: (source, Flow) | (Flow, source) | Flow', 'Common', array('src' => 'URI', 'type' => 'Text', 'width' => 'Length', 'height' => 'Length', 'poster' => 'URI', 'preload' => 'Enum#auto,metadata,none', 'controls' => 'Bool'));
         $def->addElement('source', 'Block', 'Flow', 'Common', array('src' => 'URI', 'type' => 'Text'));
         // http://developers.whatwg.org/text-level-semantics.html
         $def->addElement('s', 'Inline', 'Inline', 'Common');
         $def->addElement('mark', 'Inline', 'Inline', 'Common');
         $def->addElement('wbr', 'Inline', 'Empty', 'Core');
         $def->addElement('ruby', 'Block', 'Flow', 'Common');
         $def->addElement('rt', 'Block', 'Flow', 'Common');
         $def->addElement('rp', 'Block', 'Flow', 'Common');
         // NetCommonsで許可するタグ、属性を追加
         $def->addElement('embed', 'Block', 'Flow', 'Common');
         $def->addElement('noembed', 'Block', 'Flow', 'Common');
         $def->addAttribute('hr', 'color', 'Text');
         $def->addAttribute('tbody', 'bgcolor', 'Text');
         $def->addAttribute('tbody', 'char', 'Text');
         $def->addAttribute('tr', 'colspan', 'Text');
         $def->addAttribute('tr', 'rowspan', 'Text');
         $def->addAttribute('td', 'char', 'Text');
         $def->addAttribute('td', 'axis', 'Text');
         $def->addAttribute('td', 'headers', 'Text');
         $def->addAttribute('th', 'char', 'Text');
         $def->addAttribute('th', 'axis', 'Text');
         $def->addAttribute('th', 'headers', 'Text');
         $def->addAttribute('caption', 'valign', 'Text');
         $def->addAttribute('pre', 'cols', 'Text');
         $def->addAttribute('pre', 'wrap', 'Text');
         $def->addAttribute('object', 'border', 'Text');
         $def->addAttribute('object', 'code', 'Text');
         $def->addAttribute('object', 'usemap', 'Text');
         $def->addAttribute('object', 'align', 'Text');
         $def->addAttribute('object', 'hspace', 'Text');
         $def->addAttribute('object', 'vspace', 'Text');
         $def->addAttribute('iframe', 'hspace', 'Text');
         $def->addAttribute('iframe', 'vspace', 'Text');
         $def->addAttribute('iframe', 'allowfullscreen', 'Bool');
         $def->addAttribute('iframe', 'allowtransparency', 'Bool');
         $def->addAttribute('iframe', 'border', 'Text');
         $def->addAttribute('iframe', 'bordercolor', 'Text');
         $embedAttributes = array('src', 'height', 'width', 'hspace', 'vspace', 'units', 'border', 'frameborder', 'play', 'loop', 'quality', 'pluginspage', 'type', 'allowscriptaccess', 'allowfullscreen', 'flashvars');
         foreach ($embedAttributes as $attribute) {
             $def->addAttribute('embed', $attribute, 'Text');
         }
         // 全要素で使用する属性を設定
         $def->addElement('*', 'Block', 'Flow', 'Common');
         $def->info_global_attr = array('class' => true, 'id' => true, 'title' => true, 'cite' => true, 'background' => true, 'style' => true, 'align' => true, 'dir' => true, 'lang' => true, 'language' => true);
         if ($def->manager) {
             $def->manager->addModule('Ruby');
         }
     }
 }