the most part is allows you to preserve the structure of original template, preserving whitespace and so on with one or two minor exceptions, such as whitespace between attributes and the quotes used for attributes. Compare the source template for this example with the output. EOD; $tpl->setVar('para1', $para1); $para2 = <<<EOD Notice also how the fourth argument to the open and close handlers is used (see the PHP source) - this allows you to correctly "rebuild" tags like <div /> vs. <div></div> EOD; $tpl->setVar('para2', $para2); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); // Register the handler with the parser $parser->set_object($tpl); // Set a parser option $parser->set_option('XML_OPTION_STRIP_ESCAPES'); // Set the handlers $parser->set_element_handler('open', 'close'); $parser->set_data_handler('data'); $parser->set_escape_handler('escape'); $parser->set_pi_handler('pi'); $parser->set_jasp_handler('jasp'); // Parse the document $parser->parse(file_get_contents(__DIR__ . '/simpletemplate.tpl')); $tpl->display();
{ $this->xhtml .= $this->inTitle ? 'This is XHTML 1.0' : $data; } // Escape handler function escapeHandler(&$parser, $data) { if ($data == 'doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN"') { $this->xhtml .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'; } } // Return the XHTML document function getXHTML() { return $this->xhtml; } } // Get the HTML file $doc = file_get_contents(__DIR__ . '/example.html'); // Instantiate the handler $handler = new HTMLtoXHTMLHandler(); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); // Register the handler with the parser $parser->set_object($handler); // Set the handlers $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); // Parse the document $parser->parse($doc); echo $handler->getXHTML();
document.write('<b>Hello World!</b>'); ]]> </script> </head> <body> <?php echo ( '<b>This is a processing instruction</b>' ); ?> <a href="http://www.php.net">PHP</a> <% document.write('<i>Hello World!</i>'); %> </body> </html> EOD; // Instantiate the handler $handler = new MyHandler(); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); // Register the handler with the parser $parser->set_object($handler); // Set a parser option $parser->set_option('XML_OPTION_TRIM_DATA_NODES'); // Set the handlers $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->set_pi_handler('piHandler'); $parser->set_jasp_handler('jaspHandler'); // Parse the document $parser->parse($doc);
<?php require_once __DIR__ . '/../../vendor/autoload.php'; class MyHandler { function escape($parser, $data) { echo '<pre>' . $data . "\n\n\n</pre>"; } } $h = new MyHandler(); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); $parser->set_object($h); $parser->set_escape_handler('escape'); if (isset($_GET['strip_escapes'])) { $parser->set_option('XML_OPTION_STRIP_ESCAPES'); } ?> <h1>Parsing Word Documents</h1> <p>Shows HTMLSax parsing a simple Word generated HTML document and the impact of the option 'XML_OPTION_STRIP_ESCAPES' which can be set like; <pre> $parser->set_option('XML_OPTION_STRIP_ESCAPES'); </pre> </p> <p>Word generates some strange XML / HTML escape sequences like <![endif]> - now (3.0.0+) handled by HTMLSax correctly.</p> <p> <a href="<?php echo $_SERVER['PHP_SELF']; ?> ">XML_OPTION_STRIP_ESCAPES = 0</a> :