Convert a string to UTF-8 based on configuration.
public static convertToUTF8 ( string $str, HTMLPurifier_Config $config, HTMLPurifier_Context $context ) : string | ||
$str | string | The string to convert |
$config | HTMLPurifier_Config | |
$context | HTMLPurifier_Context | |
return | string |
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ public function purify($html, $config = null) { // :TODO: make the config merge in, instead of replace $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // setup HTML generator $this->generator = new HTMLPurifier_Generator($config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core.CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } // setup id_accumulator context, necessary due to the fact that // AttrValidator can be called from many places $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); // setup filters $filter_flags = $config->getBatch('Filter'); $custom_filters = $filter_flags['Custom']; unset($filter_flags['Custom']); $filters = array(); foreach ($filter_flags as $filter => $flag) { if (!$flag) { continue; } if (strpos($filter, '.') !== false) { continue; } $class = "HTMLPurifier_Filter_{$filter}"; $filters[] = new $class(); } foreach ($custom_filters as $filter) { // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat $filters[] = $filter; } $filters = array_merge($filters, $this->filters); // maybe prepare(), but later for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { $html = $filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context)); for ($i = $filter_size - 1; $i >= 0; $i--) { $html = $filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
public function purify($html, $config = null) { $config = $config ? HTMLPurifier_Config::create($config) : $this->config; $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); $this->generator = new HTMLPurifier_Generator($config, $context); $context->register('Generator', $this->generator); if ($config->get('Core.CollectErrors')) { $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); $filter_flags = $config->getBatch('Filter'); $custom_filters = $filter_flags['Custom']; unset($filter_flags['Custom']); $filters = array(); foreach ($filter_flags as $filter => $flag) { if (!$flag) { continue; } if (strpos($filter, '.') !== false) { continue; } $class = "HTMLPurifier_Filter_{$filter}"; $filters[] = new $class(); } foreach ($custom_filters as $filter) { $filters[] = $filter; } $filters = array_merge($filters, $this->filters); for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { $html = $filters[$i]->preFilter($html, $config, $context); } $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context)); for ($i = $filter_size - 1; $i >= 0; $i--) { $html = $filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ function purify($html, $config = null) { $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // our friendly neighborhood generator, all primed with configuration too! $this->generator->generateFromTokens(array(), $config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core', 'CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { $html = $this->filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context), $config, $context); for ($i = $size - 1; $i >= 0; $i--) { $html = $this->filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
/** * Attempts to convert a string to UTF-8 and clean any non-valid UTF-8 characters. * * @param $string * * @return bool|string */ public static function convertToUTF8($string) { // Don't wrap in a class_exists in case the server already has it's own version of HTMLPurifier and they have // open_basedir restrictions require_once Craft::getPathOfAlias('system.vendors.htmlpurifier') . '/HTMLPurifier.standalone.php'; // If it's already a UTF8 string, just clean and return it if (static::isUTF8($string)) { return \HTMLPurifier_Encoder::cleanUTF8($string); } // Otherwise set HTMLPurifier to the actual string encoding $config = \HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', static::getEncoding($string)); // Clean it $string = \HTMLPurifier_Encoder::cleanUTF8($string); // Convert it to UTF8 if possible if (static::checkForIconv()) { $string = \HTMLPurifier_Encoder::convertToUTF8($string, $config, null); } else { $encoding = static::getEncoding($string); $string = mb_convert_encoding($string, 'utf-8', $encoding); } return $string; }
public function testShiftJIS() { if (!HTMLPurifier_Encoder::iconvAvailable()) { return; } $this->config->set('Core.Encoding', 'Shift_JIS'); // This actually looks like a Yen, but we're going to treat it differently $this->assertIdentical(HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context), '\\~'); $this->assertIdentical(HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context), '\\~'); }