A pure PHP parser, DirectLex has absolutely no dependencies, making
it a reasonably good default for PHP4. Written with efficiency in mind,
it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
pales in comparison to HTMLPurifier_Lexer_DOMLex.
protected function invoke($input) { $strategy = $this->getStrategy(); $lexer = new HTMLPurifier_Lexer_DirectLex(); $tokens = $lexer->tokenizeHTML($input, $this->config, $this->context); $strategy->execute($tokens, $this->config, $this->context); }
public function tokenizeHTML($html, $config, $context) { $new_html = $this->normalize($html, $config, $context); $new_html = $this->wrapHTML($new_html, $config, $context); try { $parser = new HTML5($new_html); $doc = $parser->save(); } catch (DOMException $e) { $lexer = new HTMLPurifier_Lexer_DirectLex(); $context->register('PH5PError', $e); return $lexer->tokenizeHTML($html, $config, $context); } $tokens = array(); $this->tokenizeDOM($doc->getElementsByTagName('html')->item(0)->getElementsByTagName('body')->item(0)->getElementsByTagName('div')->item(0), $tokens); return $tokens; }
public function tokenizeHTML($html, $config, $context) { $new_html = $this->normalize($html, $config, $context); $new_html = $this->wrapHTML($new_html, $config, $context); try { $parser = new HTML5($new_html); $doc = $parser->save(); } catch (DOMException $e) { // Uh oh, it failed. Punt to DirectLex. $lexer = new HTMLPurifier_Lexer_DirectLex(); $context->register('PH5PError', $e); // save the error, so we can detect it return $lexer->tokenizeHTML($html, $config, $context); // use original HTML } $tokens = array(); $this->tokenizeDOM($doc->getElementsByTagName('html')->item(0)->getElementsByTagName('body')->item(0)->getElementsByTagName('div')->item(0), $tokens); return $tokens; }
<?php set_include_path(get_include_path() . PATH_SEPARATOR . '../library/'); require_once 'HTMLPurifier/ConfigSchema.php'; require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php'; require_once 'HTMLPurifier/Context.php'; $input = file_get_contents('samples/Lexer/4.html'); $lexer = new HTMLPurifier_Lexer_DirectLex(); $config = HTMLPurifier_Config::createDefault(); $context = new HTMLPurifier_Context(); for ($i = 0; $i < 10; $i++) { $tokens = $lexer->tokenizeHTML($input, $config, $context); }
function invokeAttr($input) { $lexer = new HTMLPurifier_Lexer_DirectLex(); $lexer->parseAttributeString($input, $this->config, $this->context); }