public function tokenizeHTML($string, $config, $context) { $this->tokens = array(); $this->last_token_was_empty = false; $string = $this->normalize($string, $config, $context); $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler','closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); // doesn't seem to work correctly for attributes $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); $parser->parse($string); restore_error_handler(); return $this->tokens; }
function tokenizeHTML($string, $config, &$context) { $this->tokens = array(); $string = $this->normalize($string, $config, $context); $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); // doesn't seem to work correctly for attributes $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); $parser->parse($string); return $this->tokens; }
function _processTemplateContent($template_contents) { $compiler = $this->view->getWactTemplate()->createCompiler(); $tag_dictionary = $compiler->getTagDictionary(); $parser = new XML_HTMLSax3(); $handler = new lmbWactHighlightHandler($tag_dictionary, $this->highlight_page_url); $handler->setTemplatePathHistory($this->history); $parser->set_object($handler); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($template_contents); $html = $handler->getHtml(); return $html; }
function parse($doc) { // Save all '<' symbols $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // Web documents shouldn't contains \x00 symbol $doc = str_replace("", '', $doc); // Opera6 bug workaround $doc = str_replace("À¼", '<', $doc); // UTF-7 encoding ASCII decode $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('_openHandler', '_closeHandler'); $parser->set_data_handler('_dataHandler'); $parser->set_escape_handler('_escapeHandler'); $parser->parse($doc); return $this->getXHTML(); }
/** * Main parsing fuction * * @param string $doc HTML document for processing * * @return string Processed (X)HTML document */ public function parse($doc) { require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3.php'; require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'States.php'; require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'Decorators.php'; // Save all '<' symbols /** * @todo this will replace * < p> with < p * May not be what we want */ $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // Web documents shouldn't contains \x00 symbol $doc = str_replace("", '', $doc); // Opera6 bug workaround $doc = str_replace("À¼", '<', $doc); // UTF-7 encoding ASCII decode $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new \XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($doc); return $this->getXHTML(); }
/** * Main parsing fuction * * @param string $doc HTML document for processing * * @return string Processed (X)HTML document */ public function parse($doc) { $result = ''; // Save all '<' symbols $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '<', $doc); // UTF7 pack $doc = $this->repackUTF7($doc); // Instantiate the parser $parser = new XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->parse($doc); $result = $this->getXHTML(); $this->clear(); return $result; }
/** * @param string $data * @access public */ function parse($data) { $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler', 'closeHandler'); $parser->set_data_handler('dataHandler'); $parser->set_escape_handler('escapeHandler'); $parser->set_option('XML_OPTION_TRIM_DATA_NODES', 0); $parser->parse($data); }
/** * Parses the given HTML or XML $data into an array of * "tokens", which are associative arrays with the following * properties: tag (the name of the tag), attributes (a key/value * array of tag attributes/properties), level (the depth of this * tag within the document), type (either 'open', 'complete' * - as in self-closing, 'cdata' - as in Character DATA, or * 'close'), and the value of the tag (AKA the contents of it). * This is also stored in the $output property of your Messy * object. * * @access public * @param string $data * @param boolean $isXml * @return array * */ function parse($data, $isXml = false) { $this->set_object($this); $this->set_element_handler('handle_start_tag', 'handle_end_tag'); $this->set_data_handler('handle_data'); $this->set_escape_handler('handle_comment'); $this->set_option('XML_OPTION_TRIM_DATA_NODES', 0); $this->output = array(); $this->level = 0; $data = str_replace('<?xml:', '<xml:', $data); if ($isXml) { if ($this->safe) { $strip = $this->stripTagsSafe; $this->stripTagsSafe = array(); } else { $strip = $this->stripTags; $this->stripTags = array(); } $close = $this->selfClosing; $this->selfClosing = array(); $trans = $this->transform; $this->transform = array(); $this->isXml = true; } else { $this->isXml = false; } if (strpos($data, '<span id="xed-template">') !== false) { $data = preg_replace('/<span id="xed-template">(.*?)<\\/span>/', '\\1', $data); } if (strpos($data, '<span class="Apple-style-span"') !== false) { $data = preg_replace('/<span class="Apple-style-span"([^>]*?)>(.*?)><\\/span>/', '\\2', $data); } while (preg_match('|<pre>([^<]*)<br />|s', $data)) { $data = preg_replace('|<pre>([^<]*)<br />|s', '<pre>\\1', $data); } parent::parse($data); // this block handles missing closing tags foreach (array_reverse($this->levels) as $tag) { $this->level--; $this->output[] = array('tag' => $tag, 'attributes' => array(), 'level' => $this->level, 'type' => 'close', 'value' => ''); } if ($isXml) { if ($this->safe) { $this->stripTagsSafe = $strip; } else { $this->stripTags = $strip; } $this->selfClosing = $close; $this->transform = $trans; $this->isXml = false; } return $this->output; }