public function tokenizeHTML($html, $config, &$context) { $html = $this->normalize($html, $config, $context); $html = $this->wrapHTML($html, $config, $context); $parser = new HTML5($html); $doc = $parser->save(); $tokens = array(); $this->tokenizeDOM($doc->getElementsByTagName('html')->item(0)->getElementsByTagName('body')->item(0)->getElementsByTagName('div')->item(0), $tokens); return $tokens; }
public function tokenizeHTML($html, $config, $context) { $new_html = $this->normalize($html, $config, $context); $new_html = $this->wrapHTML($new_html, $config, $context); try { $parser = new HTML5($new_html); $doc = $parser->save(); } catch (DOMException $e) { $lexer = new HTMLPurifier_Lexer_DirectLex(); $context->register('PH5PError', $e); return $lexer->tokenizeHTML($html, $config, $context); } $tokens = array(); $this->tokenizeDOM($doc->getElementsByTagName('html')->item(0)->getElementsByTagName('body')->item(0)->getElementsByTagName('div')->item(0), $tokens); return $tokens; }
public function tokenizeHTML($html, $config, $context) { $new_html = $this->normalize($html, $config, $context); $new_html = $this->wrapHTML($new_html, $config, $context); try { $parser = new HTML5($new_html); $doc = $parser->save(); } catch (DOMException $e) { // Uh oh, it failed. Punt to DirectLex. $lexer = new HTMLPurifier_Lexer_DirectLex(); $context->register('PH5PError', $e); // save the error, so we can detect it return $lexer->tokenizeHTML($html, $config, $context); // use original HTML } $tokens = array(); $this->tokenizeDOM($doc->getElementsByTagName('html')->item(0)->getElementsByTagName('body')->item(0)->getElementsByTagName('div')->item(0), $tokens); return $tokens; }