protected static function truncateHtml($string, $count, $suffix, $wordsPerLine, $encoding) { $config = \HTMLPurifier_Config::create(null); $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath()); $lexer = \HTMLPurifier_Lexer::create($config); $tokens = $lexer->tokenizeHTML($string, $config, null); $openTokens = 0; $totalCount = 0; $truncated = []; foreach ($tokens as $token) { if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins $openTokens++; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text if (false === $encoding) { $token->data = self::truncateWords($token->data, ($count - $totalCount) * $wordsPerLine, ''); $currentWords = str_word_count($token->data); } else { $token->data = self::truncate($token->data, ($count - $totalCount) * $wordsPerLine, '', $encoding) . ' '; $currentWords = mb_strlen($token->data, $encoding); } //$totalCount += $currentWords; if (!$token->is_whitespace) { $totalCount += intval(ceil($currentWords / $wordsPerLine)); } //turn into lines if (1 === $currentWords) { $token->data = ' ' . $token->data; } $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_End) { //Tag ends $openTokens--; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. <img/> etc. if ($token->name == 'img') { //filter img tag } else { $truncated[] = $token; } } if (0 === $openTokens && $totalCount >= $count) { break; } } $context = new \HTMLPurifier_Context(); $generator = new \HTMLPurifier_Generator($config, $context); return $generator->generateFromTokens($truncated) . $suffix; }
public function __construct() { // setup the factory parent::HTMLPurifier_Lexer(); $this->factory = new HTMLPurifier_TokenFactory(); }
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ public function purify($html, $config = null) { // :TODO: make the config merge in, instead of replace $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // setup HTML generator $this->generator = new HTMLPurifier_Generator($config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core.CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } // setup id_accumulator context, necessary due to the fact that // AttrValidator can be called from many places $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); // setup filters $filter_flags = $config->getBatch('Filter'); $custom_filters = $filter_flags['Custom']; unset($filter_flags['Custom']); $filters = array(); foreach ($filter_flags as $filter => $flag) { if (!$flag) { continue; } if (strpos($filter, '.') !== false) { continue; } $class = "HTMLPurifier_Filter_{$filter}"; $filters[] = new $class(); } foreach ($custom_filters as $filter) { // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat $filters[] = $filter; } $filters = array_merge($filters, $this->filters); // maybe prepare(), but later for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { $html = $filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context)); for ($i = $filter_size - 1; $i >= 0; $i--) { $html = $filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
/** * Truncate a string while preserving the HTML. * * @param string $string The string to truncate * @param integer $count * @param string $suffix String to append to the end of the truncated string. * @param string|boolean $encoding * @return string * @since 2.0.1 */ protected static function truncateHtml($string, $count, $suffix, $encoding = false) { $config = \HTMLPurifier_Config::create(null); $lexer = \HTMLPurifier_Lexer::create($config); $tokens = $lexer->tokenizeHTML($string, $config, null); $openTokens = 0; $totalCount = 0; $truncated = []; foreach ($tokens as $token) { if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins $openTokens++; $truncated[] = $token; } else { if ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text if (false === $encoding) { $token->data = self::truncateWords($token->data, $count - $totalCount, ''); $currentCount = str_word_count($token->data); } else { $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding) . ' '; $currentCount = mb_strlen($token->data, $encoding); } $totalCount += $currentCount; if (1 === $currentCount) { $token->data = ' ' . $token->data; } $truncated[] = $token; } else { if ($token instanceof \HTMLPurifier_Token_End) { //Tag ends $openTokens--; $truncated[] = $token; } else { if ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. <img/> etc. $truncated[] = $token; } } } } if (0 === $openTokens && $totalCount >= $count) { break; } } $context = new \HTMLPurifier_Context(); $generator = new \HTMLPurifier_Generator($config, $context); return $generator->generateFromTokens($truncated) . $suffix; }
function assertExtractBody($text, $extract = true) { $lexer = new HTMLPurifier_Lexer(); $result = $lexer->extractBody($text); if ($extract === true) { $extract = $text; } $this->assertIdentical($extract, $result); }
/** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * * @param $html String of HTML to purify * @param $config HTMLPurifier_Config object for this operation, if omitted, * defaults to the config object specified during this * object's construction. The parameter can also be any type * that HTMLPurifier_Config::create() supports. * @return Purified HTML */ function purify($html, $config = null) { $config = $config ? HTMLPurifier_Config::create($config) : $this->config; // implementation is partially environment dependant, partially // configuration dependant $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); // our friendly neighborhood generator, all primed with configuration too! $this->generator->generateFromTokens(array(), $config, $context); $context->register('Generator', $this->generator); // set up global context variables if ($config->get('Core', 'CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); $context->register('Locale', $language); $error_collector = new HTMLPurifier_ErrorCollector($context); $context->register('ErrorCollector', $error_collector); } $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { $html = $this->filters[$i]->preFilter($html, $config, $context); } // purified HTML $html = $this->generator->generateFromTokens($this->strategy->execute($lexer->tokenizeHTML($html, $config, $context), $config, $context), $config, $context); for ($i = $size - 1; $i >= 0; $i--) { $html = $this->filters[$i]->postFilter($html, $config, $context); } $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; }
/** * Truncate a string while preserving the HTML. * * @param string $string The string to truncate * @param int $count * @param string $suffix String to append to the end of the truncated string. * @param string|bool $encoding * @return string * @since 2.0.1 */ protected static function truncateHtml($string, $count, $suffix, $encoding = false) { $config = \HTMLPurifier_Config::create(null); $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath()); $lexer = \HTMLPurifier_Lexer::create($config); $tokens = $lexer->tokenizeHTML($string, $config, null); $openTokens = 0; $totalCount = 0; $truncated = []; foreach ($tokens as $token) { if ($token instanceof \HTMLPurifier_Token_Start) { //Tag begins $openTokens++; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) { //Text if (false === $encoding) { preg_match('/^(\\s*)/um', $token->data, $prefixSpace) ?: ($prefixSpace = ['', '']); $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, ''); $currentCount = self::countWords($token->data); } else { $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding); $currentCount = mb_strlen($token->data, $encoding); } $totalCount += $currentCount; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_End) { //Tag ends $openTokens--; $truncated[] = $token; } elseif ($token instanceof \HTMLPurifier_Token_Empty) { //Self contained tags, i.e. <img/> etc. $truncated[] = $token; } if (0 === $openTokens && $totalCount >= $count) { break; } } $context = new \HTMLPurifier_Context(); $generator = new \HTMLPurifier_Generator($config, $context); return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : ''); }
public function __construct() { parent::__construct(); $this->factory = new HTMLPurifier_TokenFactory(); }