]]> </script> </head> <body> <?php echo ( '<b>This is a processing instruction</b>' ); ?> <a href="http://www.php.net">PHP</a> <% document.write('<i>Hello World!</i>'); %> </body> </html> EOD; // Instantiate the handler $handler = new MyHandler(); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); // Register the handler with the parser $parser->set_object($handler); // Set a parser option $parser->set_option('XML_OPTION_TRIM_DATA_NODES'); // Set the handlers $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->set_pi_handler('piHandler'); $parser->set_jasp_handler('jaspHandler'); // Parse the document $parser->parse($doc);
the most part is allows you to preserve the structure of original template, preserving whitespace and so on with one or two minor exceptions, such as whitespace between attributes and the quotes used for attributes. Compare the source template for this example with the output. EOD; $tpl->setVar('para1', $para1); $para2 = <<<EOD Notice also how the fourth argument to the open and close handlers is used (see the PHP source) - this allows you to correctly "rebuild" tags like <div /> vs. <div></div> EOD; $tpl->setVar('para2', $para2); // Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); // Register the handler with the parser $parser->set_object($tpl); // Set a parser option $parser->set_option('XML_OPTION_STRIP_ESCAPES'); // Set the handlers $parser->set_element_handler('open', 'close'); $parser->set_data_handler('data'); $parser->set_escape_handler('escape'); $parser->set_pi_handler('pi'); $parser->set_jasp_handler('jasp'); // Parse the document $parser->parse(file_get_contents(__DIR__ . '/simpletemplate.tpl')); $tpl->display();
// Instantiate the parser $parser = new Diggin\HTMLSax\HTMLSax(); $parser->set_object($h); $parser->set_escape_handler('escape'); if (isset($_GET['strip_escapes'])) { $parser->set_option('XML_OPTION_STRIP_ESCAPES'); } ?> <h1>Parsing Word Documents</h1> <p>Shows HTMLSax parsing a simple Word generated HTML document and the impact of the option 'XML_OPTION_STRIP_ESCAPES' which can be set like; <pre> $parser->set_option('XML_OPTION_STRIP_ESCAPES'); </pre> </p> <p>Word generates some strange XML / HTML escape sequences like <![endif]> - now (3.0.0+) handled by HTMLSax correctly.</p> <p> <a href="<?php echo $_SERVER['PHP_SELF']; ?> ">XML_OPTION_STRIP_ESCAPES = 0</a> : <a href="<?php echo $_SERVER['PHP_SELF']; ?> ?strip_escapes=1">XML_OPTION_STRIP_ESCAPES = 1</a> </p> <p>Starting to parse...</p> <?php // Parse the document $parser->parse(file_get_contents(__DIR__ . '/worddoc.htm')); ?> <p>Parsing completed</p>