예제 #1
0
 protected static function truncateHtml($string, $count, $suffix, $wordsPerLine, $encoding)
 {
     $config = \HTMLPurifier_Config::create(null);
     $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath());
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
             //Text
             if (false === $encoding) {
                 $token->data = self::truncateWords($token->data, ($count - $totalCount) * $wordsPerLine, '');
                 $currentWords = str_word_count($token->data);
             } else {
                 $token->data = self::truncate($token->data, ($count - $totalCount) * $wordsPerLine, '', $encoding) . ' ';
                 $currentWords = mb_strlen($token->data, $encoding);
             }
             //$totalCount += $currentWords;
             if (!$token->is_whitespace) {
                 $totalCount += intval(ceil($currentWords / $wordsPerLine));
             }
             //turn into lines
             if (1 === $currentWords) {
                 $token->data = ' ' . $token->data;
             }
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_End) {
             //Tag ends
             $openTokens--;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Empty) {
             //Self contained tags, i.e. <img/> etc.
             if ($token->name == 'img') {
                 //filter img tag
             } else {
                 $truncated[] = $token;
             }
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . $suffix;
 }
 /**
  * @param HTMLPurifier_Token[] $tokens
  * @param HTMLPurifier_Config $config
  * @param HTMLPurifier_Context $context
  * @return array|HTMLPurifier_Token[]
  */
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     $generator = new HTMLPurifier_Generator($config, $context);
     $result = array();
     $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
     $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
     // currently only used to determine if comments should be kept
     $trusted = $config->get('HTML.Trusted');
     $comment_lookup = $config->get('HTML.AllowedComments');
     $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
     $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
     $remove_script_contents = $config->get('Core.RemoveScriptContents');
     $hidden_elements = $config->get('Core.HiddenElements');
     // remove script contents compatibility
     if ($remove_script_contents === true) {
         $hidden_elements['script'] = true;
     } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
         unset($hidden_elements['script']);
     }
     $attr_validator = new HTMLPurifier_AttrValidator();
     // removes tokens until it reaches a closing tag with its value
     $remove_until = false;
     // converts comments into text tokens when this is equal to a tag name
     $textify_comments = false;
     $token = false;
     $context->register('CurrentToken', $token);
     $e = false;
     if ($config->get('Core.CollectErrors')) {
         $e =& $context->get('ErrorCollector');
     }
     foreach ($tokens as $token) {
         if ($remove_until) {
             if (empty($token->is_tag) || $token->name !== $remove_until) {
                 continue;
             }
         }
         if (!empty($token->is_tag)) {
             // DEFINITION CALL
             // before any processing, try to transform the element
             if (isset($definition->info_tag_transform[$token->name])) {
                 $original_name = $token->name;
                 // there is a transformation for this tag
                 // DEFINITION CALL
                 $token = $definition->info_tag_transform[$token->name]->transform($token, $config, $context);
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
                 }
             }
             if (isset($definition->info[$token->name])) {
                 // mostly everything's good, but
                 // we need to make sure required attributes are in order
                 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img)) {
                     $attr_validator->validateToken($token, $config, $context);
                     $ok = true;
                     foreach ($definition->info[$token->name]->required_attr as $name) {
                         if (!isset($token->attr[$name])) {
                             $ok = false;
                             break;
                         }
                     }
                     if (!$ok) {
                         if ($e) {
                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
                         }
                         continue;
                     }
                     $token->armor['ValidateAttributes'] = true;
                 }
                 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
                     $textify_comments = $token->name;
                 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
                     $textify_comments = false;
                 }
             } elseif ($escape_invalid_tags) {
                 // invalid tag, generate HTML representation and insert in
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
             } else {
                 // check if we need to destroy all of the tag's children
                 // CAN BE GENERICIZED
                 if (isset($hidden_elements[$token->name])) {
                     if ($token instanceof HTMLPurifier_Token_Start) {
                         $remove_until = $token->name;
                     } elseif ($token instanceof HTMLPurifier_Token_Empty) {
                         // do nothing: we're still looking
                     } else {
                         $remove_until = false;
                     }
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                     }
                 } else {
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
                     }
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
             // textify comments in script tags when they are allowed
             if ($textify_comments !== false) {
                 $data = $token->data;
                 $token = new HTMLPurifier_Token_Text($data);
             } elseif ($trusted || $check_comments) {
                 // always cleanup comments
                 $trailing_hyphen = false;
                 if ($e) {
                     // perform check whether or not there's a trailing hyphen
                     if (substr($token->data, -1) == '-') {
                         $trailing_hyphen = true;
                     }
                 }
                 $token->data = rtrim($token->data, '-');
                 $found_double_hyphen = false;
                 while (strpos($token->data, '--') !== false) {
                     $found_double_hyphen = true;
                     $token->data = str_replace('--', '-', $token->data);
                 }
                 if ($trusted || !empty($comment_lookup[trim($token->data)]) || $comment_regexp !== null && preg_match($comment_regexp, trim($token->data))) {
                     // OK good
                     if ($e) {
                         if ($trailing_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
                         }
                         if ($found_double_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
                         }
                     }
                 } else {
                     if ($e) {
                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                     }
                     continue;
                 }
             } else {
                 // strip comments
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
         } else {
             continue;
         }
         $result[] = $token;
     }
     if ($remove_until && $e) {
         // we removed tokens until the end, throw error
         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
     }
     $context->destroy('CurrentToken');
     return $result;
 }
예제 #3
0
 protected function assertGeneration($tokens, $expect)
 {
     $generator = new HTMLPurifier_Generator($this->config, $this->context);
     $result = $generator->generateFromTokens($tokens);
     $this->assertIdentical($expect, $result);
 }
예제 #4
0
 function execute($tokens, $config, &$context)
 {
     $definition = $config->getHTMLDefinition();
     $generator = new HTMLPurifier_Generator();
     $result = array();
     $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
     $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
     $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
     $hidden_elements = $config->get('Core', 'HiddenElements');
     // remove script contents compatibility
     if ($remove_script_contents === true) {
         $hidden_elements['script'] = true;
     } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
         unset($hidden_elements['script']);
     }
     $attr_validator = new HTMLPurifier_AttrValidator();
     // removes tokens until it reaches a closing tag with its value
     $remove_until = false;
     // converts comments into text tokens when this is equal to a tag name
     $textify_comments = false;
     $token = false;
     $context->register('CurrentToken', $token);
     $e = false;
     if ($config->get('Core', 'CollectErrors')) {
         $e =& $context->get('ErrorCollector');
     }
     foreach ($tokens as $token) {
         if ($remove_until) {
             if (empty($token->is_tag) || $token->name !== $remove_until) {
                 continue;
             }
         }
         if (!empty($token->is_tag)) {
             // DEFINITION CALL
             // before any processing, try to transform the element
             if (isset($definition->info_tag_transform[$token->name])) {
                 $original_name = $token->name;
                 // there is a transformation for this tag
                 // DEFINITION CALL
                 $token = $definition->info_tag_transform[$token->name]->transform($token, $config, $context);
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
                 }
             }
             if (isset($definition->info[$token->name])) {
                 // mostly everything's good, but
                 // we need to make sure required attributes are in order
                 if (($token->type === 'start' || $token->type === 'empty') && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img)) {
                     $attr_validator->validateToken($token, $config, $context);
                     $ok = true;
                     foreach ($definition->info[$token->name]->required_attr as $name) {
                         if (!isset($token->attr[$name])) {
                             $ok = false;
                             break;
                         }
                     }
                     if (!$ok) {
                         if ($e) {
                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
                         }
                         continue;
                     }
                     $token->armor['ValidateAttributes'] = true;
                 }
                 if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
                     $textify_comments = $token->name;
                 } elseif ($token->name === $textify_comments && $token->type == 'end') {
                     $textify_comments = false;
                 }
             } elseif ($escape_invalid_tags) {
                 // invalid tag, generate HTML representation and insert in
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token, $config, $context));
             } else {
                 // check if we need to destroy all of the tag's children
                 // CAN BE GENERICIZED
                 if (isset($hidden_elements[$token->name])) {
                     if ($token->type == 'start') {
                         $remove_until = $token->name;
                     } elseif ($token->type == 'empty') {
                         // do nothing: we're still looking
                     } else {
                         $remove_until = false;
                     }
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                     }
                 } else {
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
                     }
                 }
                 continue;
             }
         } elseif ($token->type == 'comment') {
             // textify comments in script tags when they are allowed
             if ($textify_comments !== false) {
                 $data = $token->data;
                 $token = new HTMLPurifier_Token_Text($data);
             } else {
                 // strip comments
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                 }
                 continue;
             }
         } elseif ($token->type == 'text') {
         } else {
             continue;
         }
         $result[] = $token;
     }
     if ($remove_until && $e) {
         // we removed tokens until the end, throw error
         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
     }
     $context->destroy('CurrentToken');
     return $result;
 }
예제 #5
0
 /**
  * Truncate a string while preserving the HTML.
  * 
  * @param string $string The string to truncate
  * @param integer $count
  * @param string $suffix String to append to the end of the truncated string.
  * @param string|boolean $encoding
  * @return string
  * @since 2.0.1
  */
 protected static function truncateHtml($string, $count, $suffix, $encoding = false)
 {
     $config = \HTMLPurifier_Config::create(null);
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } else {
             if ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
                 //Text
                 if (false === $encoding) {
                     $token->data = self::truncateWords($token->data, $count - $totalCount, '');
                     $currentCount = str_word_count($token->data);
                 } else {
                     $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding) . ' ';
                     $currentCount = mb_strlen($token->data, $encoding);
                 }
                 $totalCount += $currentCount;
                 if (1 === $currentCount) {
                     $token->data = ' ' . $token->data;
                 }
                 $truncated[] = $token;
             } else {
                 if ($token instanceof \HTMLPurifier_Token_End) {
                     //Tag ends
                     $openTokens--;
                     $truncated[] = $token;
                 } else {
                     if ($token instanceof \HTMLPurifier_Token_Empty) {
                         //Self contained tags, i.e. <img/> etc.
                         $truncated[] = $token;
                     }
                 }
             }
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . $suffix;
 }
예제 #6
0
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     // local variables
     $generator = new HTMLPurifier_Generator($config, $context);
     $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
     $e = $context->get('ErrorCollector', true);
     $t = false;
     // token index
     $i = false;
     // injector index
     $token = false;
     // the current token
     $reprocess = false;
     // whether or not to reprocess the same token
     $stack = array();
     // member variables
     $this->stack =& $stack;
     $this->t =& $t;
     $this->tokens =& $tokens;
     $this->config = $config;
     $this->context = $context;
     // context variables
     $context->register('CurrentNesting', $stack);
     $context->register('InputIndex', $t);
     $context->register('InputTokens', $tokens);
     $context->register('CurrentToken', $token);
     // -- begin INJECTOR --
     $this->injectors = array();
     $injectors = $config->getBatch('AutoFormat');
     $def_injectors = $definition->info_injector;
     $custom_injectors = $injectors['Custom'];
     unset($injectors['Custom']);
     // special case
     foreach ($injectors as $injector => $b) {
         // XXX: Fix with a legitimate lookup table of enabled filters
         if (strpos($injector, '.') !== false) {
             continue;
         }
         $injector = "HTMLPurifier_Injector_{$injector}";
         if (!$b) {
             continue;
         }
         $this->injectors[] = new $injector();
     }
     foreach ($def_injectors as $injector) {
         // assumed to be objects
         $this->injectors[] = $injector;
     }
     foreach ($custom_injectors as $injector) {
         if (is_string($injector)) {
             $injector = "HTMLPurifier_Injector_{$injector}";
             $injector = new $injector();
         }
         $this->injectors[] = $injector;
     }
     // give the injectors references to the definition and context
     // variables for performance reasons
     foreach ($this->injectors as $ix => $injector) {
         $error = $injector->prepare($config, $context);
         if (!$error) {
             continue;
         }
         array_splice($this->injectors, $ix, 1);
         // rm the injector
         trigger_error("Cannot enable {$injector->name} injector because {$error} is not allowed", E_USER_WARNING);
     }
     // -- end INJECTOR --
     // a note on punting:
     //      In order to reduce code duplication, whenever some code needs
     //      to make HTML changes in order to make things "correct", the
     //      new HTML gets sent through the purifier, regardless of its
     //      status. This means that if we add a start token, because it
     //      was totally necessary, we don't have to update nesting; we just
     //      punt ($reprocess = true; continue;) and it does that for us.
     // isset is in loop because $tokens size changes during loop exec
     for ($t = 0; $t == 0 || isset($tokens[$t - 1]); $reprocess ? $reprocess = false : $t++) {
         // check for a rewind
         if (is_int($i) && $i >= 0) {
             // possibility: disable rewinding if the current token has a
             // rewind set on it already. This would offer protection from
             // infinite loop, but might hinder some advanced rewinding.
             $rewind_to = $this->injectors[$i]->getRewind();
             if (is_int($rewind_to) && $rewind_to < $t) {
                 if ($rewind_to < 0) {
                     $rewind_to = 0;
                 }
                 while ($t > $rewind_to) {
                     $t--;
                     $prev = $tokens[$t];
                     // indicate that other injectors should not process this token,
                     // but we need to reprocess it
                     unset($prev->skip[$i]);
                     $prev->rewind = $i;
                     if ($prev instanceof HTMLPurifier_Token_Start) {
                         array_pop($this->stack);
                     } elseif ($prev instanceof HTMLPurifier_Token_End) {
                         $this->stack[] = $prev->start;
                     }
                 }
             }
             $i = false;
         }
         // handle case of document end
         if (!isset($tokens[$t])) {
             // kill processing if stack is empty
             if (empty($this->stack)) {
                 break;
             }
             // peek
             $top_nesting = array_pop($this->stack);
             $this->stack[] = $top_nesting;
             // send error
             if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
             }
             // append, don't splice, since this is the end
             $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
             // punt!
             $reprocess = true;
             continue;
         }
         $token = $tokens[$t];
         //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
         // quick-check: if it's not a tag, no need to process
         if (empty($token->is_tag)) {
             if ($token instanceof HTMLPurifier_Token_Text) {
                 foreach ($this->injectors as $i => $injector) {
                     if (isset($token->skip[$i])) {
                         continue;
                     }
                     if ($token->rewind !== null && $token->rewind !== $i) {
                         continue;
                     }
                     $injector->handleText($token);
                     $this->processToken($token, $i);
                     $reprocess = true;
                     break;
                 }
             }
             // another possibility is a comment
             continue;
         }
         if (isset($definition->info[$token->name])) {
             $type = $definition->info[$token->name]->child->type;
         } else {
             $type = false;
             // Type is unknown, treat accordingly
         }
         // quick tag checks: anything that's *not* an end tag
         $ok = false;
         if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
             // claims to be a start tag but is empty
             $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
             $ok = true;
         } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
             // claims to be empty but really is a start tag
             $this->swap(new HTMLPurifier_Token_End($token->name));
             $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
             // punt (since we had to modify the input stream in a non-trivial way)
             $reprocess = true;
             continue;
         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
             // real empty token
             $ok = true;
         } elseif ($token instanceof HTMLPurifier_Token_Start) {
             // start tag
             // ...unless they also have to close their parent
             if (!empty($this->stack)) {
                 $parent = array_pop($this->stack);
                 $this->stack[] = $parent;
                 if (isset($definition->info[$parent->name])) {
                     $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
                     $autoclose = !isset($elements[$token->name]);
                 } else {
                     $autoclose = false;
                 }
                 $carryover = false;
                 if ($autoclose && $definition->info[$parent->name]->formatting) {
                     $carryover = true;
                 }
                 if ($autoclose) {
                     // errors need to be updated
                     $new_token = new HTMLPurifier_Token_End($parent->name);
                     $new_token->start = $parent;
                     if ($carryover) {
                         $element = clone $parent;
                         $element->armor['MakeWellFormed_TagClosedError'] = true;
                         $element->carryover = true;
                         $this->processToken(array($new_token, $token, $element));
                     } else {
                         $this->insertBefore($new_token);
                     }
                     if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
                         if (!$carryover) {
                             $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
                         } else {
                             $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
                         }
                     }
                     $reprocess = true;
                     continue;
                 }
             }
             $ok = true;
         }
         if ($ok) {
             foreach ($this->injectors as $i => $injector) {
                 if (isset($token->skip[$i])) {
                     continue;
                 }
                 if ($token->rewind !== null && $token->rewind !== $i) {
                     continue;
                 }
                 $injector->handleElement($token);
                 $this->processToken($token, $i);
                 $reprocess = true;
                 break;
             }
             if (!$reprocess) {
                 // ah, nothing interesting happened; do normal processing
                 $this->swap($token);
                 if ($token instanceof HTMLPurifier_Token_Start) {
                     $this->stack[] = $token;
                 } elseif ($token instanceof HTMLPurifier_Token_End) {
                     throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
                 }
             }
             continue;
         }
         // sanity check: we should be dealing with a closing tag
         if (!$token instanceof HTMLPurifier_Token_End) {
             throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
         }
         // make sure that we have something open
         if (empty($this->stack)) {
             if ($escape_invalid_tags) {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
                 }
                 $this->swap(new HTMLPurifier_Token_Text($generator->generateFromToken($token)));
             } else {
                 $this->remove();
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
                 }
             }
             $reprocess = true;
             continue;
         }
         // first, check for the simplest case: everything closes neatly.
         // Eventually, everything passes through here; if there are problems
         // we modify the input stream accordingly and then punt, so that
         // the tokens get processed again.
         $current_parent = array_pop($this->stack);
         if ($current_parent->name == $token->name) {
             $token->start = $current_parent;
             foreach ($this->injectors as $i => $injector) {
                 if (isset($token->skip[$i])) {
                     continue;
                 }
                 if ($token->rewind !== null && $token->rewind !== $i) {
                     continue;
                 }
                 $injector->handleEnd($token);
                 $this->processToken($token, $i);
                 $this->stack[] = $current_parent;
                 $reprocess = true;
                 break;
             }
             continue;
         }
         // okay, so we're trying to close the wrong tag
         // undo the pop previous pop
         $this->stack[] = $current_parent;
         // scroll back the entire nest, trying to find our tag.
         // (feature could be to specify how far you'd like to go)
         $size = count($this->stack);
         // -2 because -1 is the last element, but we already checked that
         $skipped_tags = false;
         for ($j = $size - 2; $j >= 0; $j--) {
             if ($this->stack[$j]->name == $token->name) {
                 $skipped_tags = array_slice($this->stack, $j);
                 break;
             }
         }
         // we didn't find the tag, so remove
         if ($skipped_tags === false) {
             if ($escape_invalid_tags) {
                 $this->swap(new HTMLPurifier_Token_Text($generator->generateFromToken($token)));
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
                 }
             } else {
                 $this->remove();
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
                 }
             }
             $reprocess = true;
             continue;
         }
         // do errors, in REVERSE $j order: a,b,c with </a></b></c>
         $c = count($skipped_tags);
         if ($e) {
             for ($j = $c - 1; $j > 0; $j--) {
                 // notice we exclude $j == 0, i.e. the current ending tag, from
                 // the errors...
                 if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
                 }
             }
         }
         // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
         $replace = array($token);
         for ($j = 1; $j < $c; $j++) {
             // ...as well as from the insertions
             $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
             $new_token->start = $skipped_tags[$j];
             array_unshift($replace, $new_token);
             if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
                 $element = clone $skipped_tags[$j];
                 $element->carryover = true;
                 $element->armor['MakeWellFormed_TagClosedError'] = true;
                 $replace[] = $element;
             }
         }
         $this->processToken($replace);
         $reprocess = true;
         continue;
     }
     $context->destroy('CurrentNesting');
     $context->destroy('InputTokens');
     $context->destroy('InputIndex');
     $context->destroy('CurrentToken');
     unset($this->injectors, $this->stack, $this->tokens, $this->t);
     return $tokens;
 }
예제 #7
0
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     // local variables
     $result = array();
     $generator = new HTMLPurifier_Generator();
     $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
     $e = $context->get('ErrorCollector', true);
     // member variables
     $this->currentNesting = array();
     $this->inputIndex = false;
     $this->inputTokens =& $tokens;
     $this->outputTokens =& $result;
     // context variables
     $context->register('CurrentNesting', $this->currentNesting);
     $context->register('InputIndex', $this->inputIndex);
     $context->register('InputTokens', $tokens);
     // -- begin INJECTOR --
     $this->injectors = array();
     $injectors = $config->getBatch('AutoFormat');
     $custom_injectors = $injectors['Custom'];
     unset($injectors['Custom']);
     // special case
     foreach ($injectors as $injector => $b) {
         $injector = "HTMLPurifier_Injector_{$injector}";
         if (!$b) {
             continue;
         }
         $this->injectors[] = new $injector();
     }
     foreach ($custom_injectors as $injector) {
         if (is_string($injector)) {
             $injector = "HTMLPurifier_Injector_{$injector}";
             $injector = new $injector();
         }
         $this->injectors[] = $injector;
     }
     // array index of the injector that resulted in an array
     // substitution. This enables processTokens() to know which
     // injectors are affected by the added tokens and which are
     // not (namely, the ones after the current injector are not
     // affected)
     $this->currentInjector = false;
     // give the injectors references to the definition and context
     // variables for performance reasons
     foreach ($this->injectors as $i => $injector) {
         $error = $injector->prepare($config, $context);
         if (!$error) {
             continue;
         }
         array_splice($this->injectors, $i, 1);
         // rm the injector
         trigger_error("Cannot enable {$injector->name} injector because {$error} is not allowed", E_USER_WARNING);
     }
     // warning: most foreach loops follow the convention $i => $injector.
     // Don't define these as loop-wide variables, please!
     // -- end INJECTOR --
     $token = false;
     $context->register('CurrentToken', $token);
     // isset is in loop because $tokens size changes during loop exec
     for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
         // if all goes well, this token will be passed through unharmed
         $token = $tokens[$this->inputIndex];
         //printTokens($tokens, $this->inputIndex);
         foreach ($this->injectors as $injector) {
             if ($injector->skip > 0) {
                 $injector->skip--;
             }
         }
         // quick-check: if it's not a tag, no need to process
         if (empty($token->is_tag)) {
             if ($token->type === 'text') {
                 // injector handler code; duplicated for performance reasons
                 foreach ($this->injectors as $i => $injector) {
                     if (!$injector->skip) {
                         $injector->handleText($token);
                     }
                     if (is_array($token)) {
                         $this->currentInjector = $i;
                         break;
                     }
                 }
             }
             $this->processToken($token, $config, $context);
             continue;
         }
         $info = $definition->info[$token->name]->child;
         // quick tag checks: anything that's *not* an end tag
         $ok = false;
         if ($info->type == 'empty' && $token->type == 'start') {
             // test if it claims to be a start tag but is empty
             $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
             $ok = true;
         } elseif ($info->type != 'empty' && $token->type == 'empty') {
             // claims to be empty but really is a start tag
             $token = array(new HTMLPurifier_Token_Start($token->name, $token->attr), new HTMLPurifier_Token_End($token->name));
             $ok = true;
         } elseif ($token->type == 'empty') {
             // real empty token
             $ok = true;
         } elseif ($token->type == 'start') {
             // start tag
             // ...unless they also have to close their parent
             if (!empty($this->currentNesting)) {
                 $parent = array_pop($this->currentNesting);
                 $parent_info = $definition->info[$parent->name];
                 // this can be replaced with a more general algorithm:
                 // if the token is not allowed by the parent, auto-close
                 // the parent
                 if (!isset($parent_info->child->elements[$token->name])) {
                     if ($e) {
                         $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
                     }
                     // close the parent, then append the token
                     $result[] = new HTMLPurifier_Token_End($parent->name);
                     $result[] = $token;
                     $this->currentNesting[] = $token;
                     continue;
                 }
                 $this->currentNesting[] = $parent;
                 // undo the pop
             }
             $ok = true;
         }
         // injector handler code; duplicated for performance reasons
         if ($ok) {
             foreach ($this->injectors as $i => $injector) {
                 if (!$injector->skip) {
                     $injector->handleElement($token);
                 }
                 if (is_array($token)) {
                     $this->currentInjector = $i;
                     break;
                 }
             }
             $this->processToken($token, $config, $context);
             continue;
         }
         // sanity check: we should be dealing with a closing tag
         if ($token->type != 'end') {
             continue;
         }
         // make sure that we have something open
         if (empty($this->currentNesting)) {
             if ($escape_invalid_tags) {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
                 }
                 $result[] = new HTMLPurifier_Token_Text($generator->generateFromToken($token, $config, $context));
             } elseif ($e) {
                 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
             }
             continue;
         }
         // first, check for the simplest case: everything closes neatly
         $current_parent = array_pop($this->currentNesting);
         if ($current_parent->name == $token->name) {
             $result[] = $token;
             foreach ($this->injectors as $i => $injector) {
                 $injector->notifyEnd($token);
             }
             continue;
         }
         // okay, so we're trying to close the wrong tag
         // undo the pop previous pop
         $this->currentNesting[] = $current_parent;
         // scroll back the entire nest, trying to find our tag.
         // (feature could be to specify how far you'd like to go)
         $size = count($this->currentNesting);
         // -2 because -1 is the last element, but we already checked that
         $skipped_tags = false;
         for ($i = $size - 2; $i >= 0; $i--) {
             if ($this->currentNesting[$i]->name == $token->name) {
                 // current nesting is modified
                 $skipped_tags = array_splice($this->currentNesting, $i);
                 break;
             }
         }
         // we still didn't find the tag, so remove
         if ($skipped_tags === false) {
             if ($escape_invalid_tags) {
                 $result[] = new HTMLPurifier_Token_Text($generator->generateFromToken($token, $config, $context));
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
                 }
             } elseif ($e) {
                 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
             }
             continue;
         }
         // okay, we found it, close all the skipped tags
         // note that skipped tags contains the element we need closed
         for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
             // please don't redefine $i!
             if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
             }
             $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
             foreach ($this->injectors as $injector) {
                 $injector->notifyEnd($new_token);
             }
         }
     }
     $context->destroy('CurrentNesting');
     $context->destroy('InputTokens');
     $context->destroy('InputIndex');
     $context->destroy('CurrentToken');
     // we're at the end now, fix all still unclosed tags (this is
     // duplicated from the end of the loop with some slight modifications)
     // not using $skipped_tags since it would invariably be all of them
     if (!empty($this->currentNesting)) {
         for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
             // please don't redefine $i!
             if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
             }
             $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
             foreach ($this->injectors as $injector) {
                 $injector->notifyEnd($new_token);
             }
         }
     }
     unset($this->outputTokens, $this->injectors, $this->currentInjector, $this->currentNesting, $this->inputTokens, $this->inputIndex);
     return $result;
 }
예제 #8
0
 public function validateChildren($tokens_of_children, $config, $context)
 {
     // Flag for subclasses
     $this->whitespace = false;
     // if there are no tokens, delete parent node
     if (empty($tokens_of_children)) {
         return false;
     }
     // the new set of children
     $result = array();
     // current depth into the nest
     $nesting = 0;
     // whether or not we're deleting a node
     $is_deleting = false;
     // whether or not parsed character data is allowed
     // this controls whether or not we silently drop a tag
     // or generate escaped HTML from it
     $pcdata_allowed = isset($this->elements['#PCDATA']);
     // a little sanity check to make sure it's not ALL whitespace
     $all_whitespace = true;
     // some configuration
     $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
     // generator
     $gen = new HTMLPurifier_Generator($config, $context);
     foreach ($tokens_of_children as $token) {
         if (!empty($token->is_whitespace)) {
             $result[] = $token;
             continue;
         }
         $all_whitespace = false;
         // phew, we're not talking about whitespace
         $is_child = $nesting == 0;
         if ($token instanceof HTMLPurifier_Token_Start) {
             $nesting++;
         } elseif ($token instanceof HTMLPurifier_Token_End) {
             $nesting--;
         }
         if ($is_child) {
             $is_deleting = false;
             if (!isset($this->elements[$token->name])) {
                 $is_deleting = true;
                 if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
                     $result[] = $token;
                 } elseif ($pcdata_allowed && $escape_invalid_children) {
                     $result[] = new HTMLPurifier_Token_Text($gen->generateFromToken($token));
                 }
                 continue;
             }
         }
         if (!$is_deleting || $pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
             $result[] = $token;
         } elseif ($pcdata_allowed && $escape_invalid_children) {
             $result[] = new HTMLPurifier_Token_Text($gen->generateFromToken($token));
         } else {
             // drop silently
         }
     }
     if (empty($result)) {
         return false;
     }
     if ($all_whitespace) {
         $this->whitespace = true;
         return false;
     }
     if ($tokens_of_children == $result) {
         return true;
     }
     return $result;
 }
예제 #9
0
/**
 * Debugging function that prints tokens in a user-friendly manner.
 */
function printTokens($tokens, $index = null)
{
    $string = '<pre>';
    $generator = new HTMLPurifier_Generator(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context());
    foreach ($tokens as $i => $token) {
        if ($index === $i) {
            $string .= '[<strong>';
        }
        $string .= "<sup>{$i}</sup>";
        $string .= $generator->escape($generator->generateFromToken($token));
        if ($index === $i) {
            $string .= '</strong>]';
        }
    }
    $string .= '</pre>';
    echo $string;
}
예제 #10
0
 /**
  * Generate textual HTML from tokens
  */
 protected function generate($tokens)
 {
     $generator = new HTMLPurifier_Generator($this->config, $this->context);
     return $generator->generateFromTokens($tokens);
 }
예제 #11
0
 /**
  * Truncate a string while preserving the HTML.
  *
  * @param string $string The string to truncate
  * @param int $count
  * @param string $suffix String to append to the end of the truncated string.
  * @param string|bool $encoding
  * @return string
  * @since 2.0.1
  */
 protected static function truncateHtml($string, $count, $suffix, $encoding = false)
 {
     $config = \HTMLPurifier_Config::create(null);
     $config->set('Cache.SerializerPath', \Yii::$app->getRuntimePath());
     $lexer = \HTMLPurifier_Lexer::create($config);
     $tokens = $lexer->tokenizeHTML($string, $config, null);
     $openTokens = 0;
     $totalCount = 0;
     $truncated = [];
     foreach ($tokens as $token) {
         if ($token instanceof \HTMLPurifier_Token_Start) {
             //Tag begins
             $openTokens++;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Text && $totalCount <= $count) {
             //Text
             if (false === $encoding) {
                 preg_match('/^(\\s*)/um', $token->data, $prefixSpace) ?: ($prefixSpace = ['', '']);
                 $token->data = $prefixSpace[1] . self::truncateWords(ltrim($token->data), $count - $totalCount, '');
                 $currentCount = self::countWords($token->data);
             } else {
                 $token->data = self::truncate($token->data, $count - $totalCount, '', $encoding);
                 $currentCount = mb_strlen($token->data, $encoding);
             }
             $totalCount += $currentCount;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_End) {
             //Tag ends
             $openTokens--;
             $truncated[] = $token;
         } elseif ($token instanceof \HTMLPurifier_Token_Empty) {
             //Self contained tags, i.e. <img/> etc.
             $truncated[] = $token;
         }
         if (0 === $openTokens && $totalCount >= $count) {
             break;
         }
     }
     $context = new \HTMLPurifier_Context();
     $generator = new \HTMLPurifier_Generator($config, $context);
     return $generator->generateFromTokens($truncated) . ($totalCount >= $count ? $suffix : '');
 }
 public function execute($tokens, $config, $context)
 {
     $definition = $config->getHTMLDefinition();
     $generator = new HTMLPurifier_Generator($config, $context);
     $result = array();
     $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
     $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
     $trusted = $config->get('HTML.Trusted');
     $comment_lookup = $config->get('HTML.AllowedComments');
     $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
     $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
     $remove_script_contents = $config->get('Core.RemoveScriptContents');
     $hidden_elements = $config->get('Core.HiddenElements');
     if ($remove_script_contents === true) {
         $hidden_elements['script'] = true;
     } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
         unset($hidden_elements['script']);
     }
     $attr_validator = new HTMLPurifier_AttrValidator();
     $remove_until = false;
     $textify_comments = false;
     $token = false;
     $context->register('CurrentToken', $token);
     $e = false;
     if ($config->get('Core.CollectErrors')) {
         $e =& $context->get('ErrorCollector');
     }
     foreach ($tokens as $token) {
         if ($remove_until) {
             if (empty($token->is_tag) || $token->name !== $remove_until) {
                 continue;
             }
         }
         if (!empty($token->is_tag)) {
             if (isset($definition->info_tag_transform[$token->name])) {
                 $original_name = $token->name;
                 $token = $definition->info_tag_transform[$token->name]->transform($token, $config, $context);
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
                 }
             }
             if (isset($definition->info[$token->name])) {
                 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img)) {
                     $attr_validator->validateToken($token, $config, $context);
                     $ok = true;
                     foreach ($definition->info[$token->name]->required_attr as $name) {
                         if (!isset($token->attr[$name])) {
                             $ok = false;
                             break;
                         }
                     }
                     if (!$ok) {
                         if ($e) {
                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
                         }
                         continue;
                     }
                     $token->armor['ValidateAttributes'] = true;
                 }
                 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
                     $textify_comments = $token->name;
                 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
                     $textify_comments = false;
                 }
             } elseif ($escape_invalid_tags) {
                 if ($e) {
                     $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
                 }
                 $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
             } else {
                 if (isset($hidden_elements[$token->name])) {
                     if ($token instanceof HTMLPurifier_Token_Start) {
                         $remove_until = $token->name;
                     } elseif ($token instanceof HTMLPurifier_Token_Empty) {
                     } else {
                         $remove_until = false;
                     }
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                     }
                 } else {
                     if ($e) {
                         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
                     }
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
             if ($textify_comments !== false) {
                 $data = $token->data;
                 $token = new HTMLPurifier_Token_Text($data);
             } elseif ($trusted || $check_comments) {
                 $trailing_hyphen = false;
                 if ($e) {
                     if (substr($token->data, -1) == '-') {
                         $trailing_hyphen = true;
                     }
                 }
                 $token->data = rtrim($token->data, '-');
                 $found_double_hyphen = false;
                 while (strpos($token->data, '--') !== false) {
                     $found_double_hyphen = true;
                     $token->data = str_replace('--', '-', $token->data);
                 }
                 if ($trusted || !empty($comment_lookup[trim($token->data)]) || $comment_regexp !== null && preg_match($comment_regexp, trim($token->data))) {
                     if ($e) {
                         if ($trailing_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
                         }
                         if ($found_double_hyphen) {
                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
                         }
                     }
                 } else {
                     if ($e) {
                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                     }
                     continue;
                 }
             } else {
                 if ($e) {
                     $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                 }
                 continue;
             }
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
         } else {
             continue;
         }
         $result[] = $token;
     }
     if ($remove_until && $e) {
         $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
     }
     $context->destroy('CurrentToken');
     return $result;
 }