convertToUTF8() public static method

Convert a string to UTF-8 based on configuration.
public static convertToUTF8 ( string $str, HTMLPurifier_Config $config, HTMLPurifier_Context $context ) : string
$str string The string to convert
$config HTMLPurifier_Config
$context HTMLPurifier_Context
return string
 /**
  * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  *
  * @param $html String of HTML to purify
  * @param $config HTMLPurifier_Config object for this operation, if omitted,
  *                defaults to the config object specified during this
  *                object's construction. The parameter can also be any type
  *                that HTMLPurifier_Config::create() supports.
  * @return Purified HTML
  */
 public function purify($html, $config = null)
 {
     // :TODO: make the config merge in, instead of replace
     $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
     // implementation is partially environment dependant, partially
     // configuration dependant
     $lexer = HTMLPurifier_Lexer::create($config);
     $context = new HTMLPurifier_Context();
     // setup HTML generator
     $this->generator = new HTMLPurifier_Generator($config, $context);
     $context->register('Generator', $this->generator);
     // set up global context variables
     if ($config->get('Core.CollectErrors')) {
         // may get moved out if other facilities use it
         $language_factory = HTMLPurifier_LanguageFactory::instance();
         $language = $language_factory->create($config, $context);
         $context->register('Locale', $language);
         $error_collector = new HTMLPurifier_ErrorCollector($context);
         $context->register('ErrorCollector', $error_collector);
     }
     // setup id_accumulator context, necessary due to the fact that
     // AttrValidator can be called from many places
     $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
     $context->register('IDAccumulator', $id_accumulator);
     $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
     // setup filters
     $filter_flags = $config->getBatch('Filter');
     $custom_filters = $filter_flags['Custom'];
     unset($filter_flags['Custom']);
     $filters = array();
     foreach ($filter_flags as $filter => $flag) {
         if (!$flag) {
             continue;
         }
         if (strpos($filter, '.') !== false) {
             continue;
         }
         $class = "HTMLPurifier_Filter_{$filter}";
         $filters[] = new $class();
     }
     foreach ($custom_filters as $filter) {
         // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
         $filters[] = $filter;
     }
     $filters = array_merge($filters, $this->filters);
     // maybe prepare(), but later
     for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
         $html = $filters[$i]->preFilter($html, $config, $context);
     }
     // purified HTML
     $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context));
     for ($i = $filter_size - 1; $i >= 0; $i--) {
         $html = $filters[$i]->postFilter($html, $config, $context);
     }
     $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
     $this->context =& $context;
     return $html;
 }
 public function purify($html, $config = null)
 {
     $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
     $lexer = HTMLPurifier_Lexer::create($config);
     $context = new HTMLPurifier_Context();
     $this->generator = new HTMLPurifier_Generator($config, $context);
     $context->register('Generator', $this->generator);
     if ($config->get('Core.CollectErrors')) {
         $language_factory = HTMLPurifier_LanguageFactory::instance();
         $language = $language_factory->create($config, $context);
         $context->register('Locale', $language);
         $error_collector = new HTMLPurifier_ErrorCollector($context);
         $context->register('ErrorCollector', $error_collector);
     }
     $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
     $context->register('IDAccumulator', $id_accumulator);
     $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
     $filter_flags = $config->getBatch('Filter');
     $custom_filters = $filter_flags['Custom'];
     unset($filter_flags['Custom']);
     $filters = array();
     foreach ($filter_flags as $filter => $flag) {
         if (!$flag) {
             continue;
         }
         if (strpos($filter, '.') !== false) {
             continue;
         }
         $class = "HTMLPurifier_Filter_{$filter}";
         $filters[] = new $class();
     }
     foreach ($custom_filters as $filter) {
         $filters[] = $filter;
     }
     $filters = array_merge($filters, $this->filters);
     for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
         $html = $filters[$i]->preFilter($html, $config, $context);
     }
     $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context));
     for ($i = $filter_size - 1; $i >= 0; $i--) {
         $html = $filters[$i]->postFilter($html, $config, $context);
     }
     $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
     $this->context =& $context;
     return $html;
 }
 /**
  * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  * 
  * @param $html String of HTML to purify
  * @param $config HTMLPurifier_Config object for this operation, if omitted,
  *                defaults to the config object specified during this
  *                object's construction. The parameter can also be any type
  *                that HTMLPurifier_Config::create() supports.
  * @return Purified HTML
  */
 function purify($html, $config = null)
 {
     $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
     // implementation is partially environment dependant, partially
     // configuration dependant
     $lexer = HTMLPurifier_Lexer::create($config);
     $context = new HTMLPurifier_Context();
     // our friendly neighborhood generator, all primed with configuration too!
     $this->generator->generateFromTokens(array(), $config, $context);
     $context->register('Generator', $this->generator);
     // set up global context variables
     if ($config->get('Core', 'CollectErrors')) {
         // may get moved out if other facilities use it
         $language_factory = HTMLPurifier_LanguageFactory::instance();
         $language = $language_factory->create($config, $context);
         $context->register('Locale', $language);
         $error_collector = new HTMLPurifier_ErrorCollector($context);
         $context->register('ErrorCollector', $error_collector);
     }
     $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
     for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
         $html = $this->filters[$i]->preFilter($html, $config, $context);
     }
     // purified HTML
     $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context), $config, $context);
     for ($i = $size - 1; $i >= 0; $i--) {
         $html = $this->filters[$i]->postFilter($html, $config, $context);
     }
     $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
     $this->context =& $context;
     return $html;
 }
Beispiel #4
0
 /**
  * Attempts to convert a string to UTF-8 and clean any non-valid UTF-8 characters.
  *
  * @param      $string
  *
  * @return bool|string
  */
 public static function convertToUTF8($string)
 {
     // Don't wrap in a class_exists in case the server already has it's own version of HTMLPurifier and they have
     // open_basedir restrictions
     require_once Craft::getPathOfAlias('system.vendors.htmlpurifier') . '/HTMLPurifier.standalone.php';
     // If it's already a UTF8 string, just clean and return it
     if (static::isUTF8($string)) {
         return \HTMLPurifier_Encoder::cleanUTF8($string);
     }
     // Otherwise set HTMLPurifier to the actual string encoding
     $config = \HTMLPurifier_Config::createDefault();
     $config->set('Core.Encoding', static::getEncoding($string));
     // Clean it
     $string = \HTMLPurifier_Encoder::cleanUTF8($string);
     // Convert it to UTF8 if possible
     if (static::checkForIconv()) {
         $string = \HTMLPurifier_Encoder::convertToUTF8($string, $config, null);
     } else {
         $encoding = static::getEncoding($string);
         $string = mb_convert_encoding($string, 'utf-8', $encoding);
     }
     return $string;
 }
Beispiel #5
0
 public function testShiftJIS()
 {
     if (!HTMLPurifier_Encoder::iconvAvailable()) {
         return;
     }
     $this->config->set('Core.Encoding', 'Shift_JIS');
     // This actually looks like a Yen, but we're going to treat it differently
     $this->assertIdentical(HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context), '\\~');
     $this->assertIdentical(HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context), '\\~');
 }