Builds an IDAccumulator, also initializing the default blacklist
public static build ( HTMLPurifier_Config $config, HTMLPurifier_Context $context ) : HTMLPurifier_IDAccumulator | ||
$config | HTMLPurifier_Config | Instance of HTMLPurifier_Config |
$context | HTMLPurifier_Context | Instance of HTMLPurifier_Context |
return | HTMLPurifier_IDAccumulator | Fully initialized HTMLPurifier_IDAccumulator |
/** * Validates the attributes of a token, mutating it as necessary. * that has valid tokens * @param HTMLPurifier_Token $token Token to validate. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context */ public function validateToken($token, $config, $context) { $definition = $config->getHTMLDefinition(); $e =& $context->get('ErrorCollector', true); // initialize IDAccumulator if necessary $ok =& $context->get('IDAccumulator', true); if (!$ok) { $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); } // initialize CurrentToken if necessary $current_token =& $context->get('CurrentToken', true); if (!$current_token) { $context->register('CurrentToken', $token); } if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) { return; } // create alias to global definition array, see also $defs // DEFINITION CALL $d_defs = $definition->info_global_attr; // don't update token until the very end, to ensure an atomic update $attr = $token->attr; // do global transformations (pre) // nothing currently utilizes this foreach ($definition->info_attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // do local transformations only applicable to this element (pre) // ex. <p align="right"> to <p style="text-align:right;"> foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // create alias to this element's attribute definition array, see // also $d_defs (global attribute definition array) // DEFINITION CALL $defs = $definition->info[$token->name]->attr; $attr_key = false; $context->register('CurrentAttr', $attr_key); // iterate through all the attribute keypairs // Watch out for name collisions: $key has previously been used foreach ($attr as $attr_key => $value) { // call the definition if (isset($defs[$attr_key])) { // there is a local definition defined if ($defs[$attr_key] === false) { // We've explicitly been told not to allow this element. // This is usually when there's a global definition // that must be overridden. // Theoretically speaking, we could have a // AttrDef_DenyAll, but this is faster! $result = false; } else { // validate according to the element's definition $result = $defs[$attr_key]->validate($value, $config, $context); } } elseif (isset($d_defs[$attr_key])) { // there is a global definition defined, validate according // to the global definition $result = $d_defs[$attr_key]->validate($value, $config, $context); } else { // system never heard of the attribute? DELETE! $result = false; } // put the results into effect if ($result === false || $result === null) { // this is a generic error message that should replaced // with more specific ones when possible if ($e) { $e->send(E_ERROR, 'AttrValidator: Attribute removed'); } // remove the attribute unset($attr[$attr_key]); } elseif (is_string($result)) { // generally, if a substitution is happening, there // was some sort of implicit correction going on. We'll // delegate it to the attribute classes to say exactly what. // simple substitution $attr[$attr_key] = $result; } else { // nothing happens } // we'd also want slightly more complicated substitution // involving an array as the return value, // although we're not sure how colliding attributes would // resolve (certain ones would be completely overriden, // others would prepend themselves). } $context->destroy('CurrentAttr'); // post transforms // global (error reporting untested) foreach ($definition->info_attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // local (error reporting untested) foreach ($definition->info[$token->name]->attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } $token->attr = $attr; // destroy CurrentToken if we made it ourselves if (!$current_token) { $context->destroy('CurrentToken'); } }
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ public function purify($html, $config = null) { // :TODO: make the config merge in, instead of replace $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // setup HTML generator $this->generator = new HTMLPurifier_Generator($config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core.CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } // setup id_accumulator context, necessary due to the fact that // AttrValidator can be called from many places $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); // setup filters $filter_flags = $config->getBatch('Filter'); $custom_filters = $filter_flags['Custom']; unset($filter_flags['Custom']); $filters = array(); foreach ($filter_flags as $filter => $flag) { if (!$flag) { continue; } if (strpos($filter, '.') !== false) { continue; } $class = "HTMLPurifier_Filter_{$filter}"; $filters[] = new $class(); } foreach ($custom_filters as $filter) { // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat $filters[] = $filter; } $filters = array_merge($filters, $this->filters); // maybe prepare(), but later for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { $html = $filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context)); for ($i = $filter_size - 1; $i >= 0; $i--) { $html = $filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ function purify($html, $config = null) { $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // our friendly neighborhood generator, all primed with configuration too! $this->generator->generateFromTokens(array(), $config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core', 'CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } // setup id_accumulator context, necessary due to the fact that // AttrValidator can be called from many places $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { $html = $this->filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context), $config, $context); for ($i = $size - 1; $i >= 0; $i--) { $html = $this->filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
function testBuild() { $this->config->set('Attr.IDBlacklist', array('foo')); $accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context); $this->assertTrue(isset($accumulator->ids['foo'])); }
public function validateToken($token, $config, $context) { $definition = $config->getHTMLDefinition(); $e =& $context->get('ErrorCollector', true); $ok =& $context->get('IDAccumulator', true); if (!$ok) { $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); } $current_token =& $context->get('CurrentToken', true); if (!$current_token) { $context->register('CurrentToken', $token); } if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) { return; } $d_defs = $definition->info_global_attr; $attr = $token->attr; foreach ($definition->info_attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } $defs = $definition->info[$token->name]->attr; $attr_key = false; $context->register('CurrentAttr', $attr_key); foreach ($attr as $attr_key => $value) { if (isset($defs[$attr_key])) { if ($defs[$attr_key] === false) { $result = false; } else { $result = $defs[$attr_key]->validate($value, $config, $context); } } elseif (isset($d_defs[$attr_key])) { $result = $d_defs[$attr_key]->validate($value, $config, $context); } else { $result = false; } if ($result === false || $result === null) { if ($e) { $e->send(E_ERROR, 'AttrValidator: Attribute removed'); } unset($attr[$attr_key]); } elseif (is_string($result)) { $attr[$attr_key] = $result; } else { } } $context->destroy('CurrentAttr'); foreach ($definition->info_attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } foreach ($definition->info[$token->name]->attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } $token->attr = $attr; if (!$current_token) { $context->destroy('CurrentToken'); } }