public function tokenizeHTML($string, $config, $context) { $this->tokens = array(); $this->last_token_was_empty = false; $string = $this->normalize($string, $config, $context); $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler','closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); // doesn't seem to work correctly for attributes $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); $parser->parse($string); restore_error_handler(); return $this->tokens; }
function tokenizeHTML($string, $config, &$context) { $this->tokens = array(); $string = $this->normalize($string, $config, $context); $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); // doesn't seem to work correctly for attributes $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); $parser->parse($string); return $this->tokens; }
function _processTemplateContent($template_contents) { $compiler = $this->view->getWactTemplate()->createCompiler(); $tag_dictionary = $compiler->getTagDictionary(); $parser = new XML_HTMLSax3(); $handler = new lmbWactHighlightHandler($tag_dictionary, $this->highlight_page_url); $handler->setTemplatePathHistory($this->history); $parser->set_object($handler); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($template_contents); $html = $handler->getHtml(); return $html; }
function parse($doc) { // Save all '<' symbols $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // Web documents shouldn't contains \x00 symbol $doc = str_replace("", '', $doc); // Opera6 bug workaround $doc = str_replace("À¼", '<', $doc); // UTF-7 encoding ASCII decode $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('_openHandler', '_closeHandler'); $parser->set_data_handler('_dataHandler'); $parser->set_escape_handler('_escapeHandler'); $parser->parse($doc); return $this->getXHTML(); }
/** * Main parsing fuction * * @param string $doc HTML document for processing * * @return string Processed (X)HTML document */ public function parse($doc) { require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3.php'; require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'States.php'; require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'Decorators.php'; // Save all '<' symbols /** * @todo this will replace * < p> with < p * May not be what we want */ $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // Web documents shouldn't contains \x00 symbol $doc = str_replace("", '', $doc); // Opera6 bug workaround $doc = str_replace("À¼", '<', $doc); // UTF-7 encoding ASCII decode $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new \XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($doc); return $this->getXHTML(); }
/** * Main parsing fuction * * @param string $doc HTML document for processing * * @return string Processed (X)HTML document */ public function parse($doc) { $result = ''; // Save all '<' symbols $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // UTF7 pack $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($doc); $result = $this->getXHTML(); $this->clear(); return $result; }
/** * @param string $data * @access public */ function parse($data) { $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->set_option('XML_OPTION_TRIM_DATA_NODES', 0); $parser->parse($data); }