protected static function truncateHtml($string, $count, $suffix, $wordsPerLine, $encoding)
 {
     $config = \HTMLPurifier_Config::create(null);
     $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath());
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
             //Text
             if (false === $encoding) {
                 $token->data = self::truncateWords($token->data, ($count - $totalCount) * $wordsPerLine, '');
                 $currentWords = str_word_count($token->data);
             } else {
                 $token->data = self::truncate($token->data, ($count - $totalCount) * $wordsPerLine, '', $encoding) . ' ';
                 $currentWords = mb_strlen($token->data, $encoding);
             }
             //$totalCount += $currentWords;
             if (!$token->is_whitespace) {
                 $totalCount += intval(ceil($currentWords / $wordsPerLine));
             }
             //turn into lines
             if (1 === $currentWords) {
                 $token->data = ' ' . $token->data;
             }
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_End) {
             //Tag ends
             $openTokens--;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Empty) {
             //Self contained tags, i.e. <img/> etc.
             if ($token->name == 'img') {
                 //filter img tag
             } else {
                 $truncated[] = $token;
             }
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . $suffix;
 }
Example #2
0
 public function __construct()
 {
     // setup the factory
     parent::HTMLPurifier_Lexer();
     $this->factory = new HTMLPurifier_TokenFactory();
 }
 /**
  * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  *
  * @param $html String of HTML to purify
  * @param $config HTMLPurifier_Config object for this operation, if omitted,
  *                defaults to the config object specified during this
  *                object's construction. The parameter can also be any type
  *                that HTMLPurifier_Config::create() supports.
  * @return Purified HTML
  */
 public function purify($html, $config = null)
 {
     // :TODO: make the config merge in, instead of replace
     $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
     // implementation is partially environment dependant, partially
     // configuration dependant
     $lexer = HTMLPurifier_Lexer::create($config);
     $context = new HTMLPurifier_Context();
     // setup HTML generator
     $this->generator = new HTMLPurifier_Generator($config, $context);
     $context->register('Generator', $this->generator);
     // set up global context variables
     if ($config->get('Core.CollectErrors')) {
         // may get moved out if other facilities use it
         $language_factory = HTMLPurifier_LanguageFactory::instance();
         $language = $language_factory->create($config, $context);
         $context->register('Locale', $language);
         $error_collector = new HTMLPurifier_ErrorCollector($context);
         $context->register('ErrorCollector', $error_collector);
     }
     // setup id_accumulator context, necessary due to the fact that
     // AttrValidator can be called from many places
     $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
     $context->register('IDAccumulator', $id_accumulator);
     $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
     // setup filters
     $filter_flags = $config->getBatch('Filter');
     $custom_filters = $filter_flags['Custom'];
     unset($filter_flags['Custom']);
     $filters = array();
     foreach ($filter_flags as $filter => $flag) {
         if (!$flag) {
             continue;
         }
         if (strpos($filter, '.') !== false) {
             continue;
         }
         $class = "HTMLPurifier_Filter_{$filter}";
         $filters[] = new $class();
     }
     foreach ($custom_filters as $filter) {
         // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
         $filters[] = $filter;
     }
     $filters = array_merge($filters, $this->filters);
     // maybe prepare(), but later
     for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
         $html = $filters[$i]->preFilter($html, $config, $context);
     }
     // purified HTML
     $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context));
     for ($i = $filter_size - 1; $i >= 0; $i--) {
         $html = $filters[$i]->postFilter($html, $config, $context);
     }
     $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
     $this->context =& $context;
     return $html;
 }
Example #4
0
 /**
  * Truncate a string while preserving the HTML.
  * 
  * @param string $string The string to truncate
  * @param integer $count
  * @param string $suffix String to append to the end of the truncated string.
  * @param string|boolean $encoding
  * @return string
  * @since 2.0.1
  */
 protected static function truncateHtml($string, $count, $suffix, $encoding = false)
 {
     $config = \HTMLPurifier_Config::create(null);
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } else {
             if ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
                 //Text
                 if (false === $encoding) {
                     $token->data = self::truncateWords($token->data, $count - $totalCount, '');
                     $currentCount = str_word_count($token->data);
                 } else {
                     $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding) . ' ';
                     $currentCount = mb_strlen($token->data, $encoding);
                 }
                 $totalCount += $currentCount;
                 if (1 === $currentCount) {
                     $token->data = ' ' . $token->data;
                 }
                 $truncated[] = $token;
             } else {
                 if ($token instanceof \HTMLPurifier_Token_End) {
                     //Tag ends
                     $openTokens--;
                     $truncated[] = $token;
                 } else {
                     if ($token instanceof \HTMLPurifier_Token_Empty) {
                         //Self contained tags, i.e. <img/> etc.
                         $truncated[] = $token;
                     }
                 }
             }
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . $suffix;
 }
Example #5
0
 function assertExtractBody($text, $extract = true)
 {
     $lexer = new HTMLPurifier_Lexer();
     $result = $lexer->extractBody($text);
     if ($extract === true) {
         $extract = $text;
     }
     $this->assertIdentical($extract, $result);
 }
 /**
  * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  * 
  * @param $html String of HTML to purify
  * @param $config HTMLPurifier_Config object for this operation, if omitted,
  *                defaults to the config object specified during this
  *                object's construction. The parameter can also be any type
  *                that HTMLPurifier_Config::create() supports.
  * @return Purified HTML
  */
 function purify($html, $config = null)
 {
     $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
     // implementation is partially environment dependant, partially
     // configuration dependant
     $lexer = HTMLPurifier_Lexer::create($config);
     $context = new HTMLPurifier_Context();
     // our friendly neighborhood generator, all primed with configuration too!
     $this->generator->generateFromTokens(array(), $config, $context);
     $context->register('Generator', $this->generator);
     // set up global context variables
     if ($config->get('Core', 'CollectErrors')) {
         // may get moved out if other facilities use it
         $language_factory = HTMLPurifier_LanguageFactory::instance();
         $language = $language_factory->create($config, $context);
         $context->register('Locale', $language);
         $error_collector = new HTMLPurifier_ErrorCollector($context);
         $context->register('ErrorCollector', $error_collector);
     }
     $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
     for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
         $html = $this->filters[$i]->preFilter($html, $config, $context);
     }
     // purified HTML
     $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context), $config, $context);
     for ($i = $size - 1; $i >= 0; $i--) {
         $html = $this->filters[$i]->postFilter($html, $config, $context);
     }
     $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
     $this->context =& $context;
     return $html;
 }
Example #7
0
 /**
  * Truncate a string while preserving the HTML.
  *
  * @param string $string The string to truncate
  * @param int $count
  * @param string $suffix String to append to the end of the truncated string.
  * @param string|bool $encoding
  * @return string
  * @since 2.0.1
  */
 protected static function truncateHtml($string, $count, $suffix, $encoding = false)
 {
     $config = \HTMLPurifier_Config::create(null);
     $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath());
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
             //Text
             if (false === $encoding) {
                 preg_match('/^(\\s*)/um', $token->data, $prefixSpace) ?: ($prefixSpace = ['', '']);
                 $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, '');
                 $currentCount = self::countWords($token->data);
             } else {
                 $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding);
                 $currentCount = mb_strlen($token->data, $encoding);
             }
             $totalCount += $currentCount;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_End) {
             //Tag ends
             $openTokens--;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Empty) {
             //Self contained tags, i.e. <img/> etc.
             $truncated[] = $token;
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : '');
 }
 public function __construct()
 {
     parent::__construct();
     $this->factory = new HTMLPurifier_TokenFactory();
 }