Beispiel #1
0
    public function tokenizeHTML($string, $config, $context) {

        $this->tokens = array();
        $this->last_token_was_empty = false;

        $string = $this->normalize($string, $config, $context);

        $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));

        $parser = new XML_HTMLSax3();
        $parser->set_object($this);
        $parser->set_element_handler('openHandler','closeHandler');
        $parser->set_data_handler('dataHandler');
        $parser->set_escape_handler('escapeHandler');

        // doesn't seem to work correctly for attributes
        $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);

        $parser->parse($string);

        restore_error_handler();

        return $this->tokens;

    }
Beispiel #2
0
 function tokenizeHTML($string, $config, &$context)
 {
     $this->tokens = array();
     $string = $this->normalize($string, $config, $context);
     $parser = new XML_HTMLSax3();
     $parser->set_object($this);
     $parser->set_element_handler('openHandler', 'closeHandler');
     $parser->set_data_handler('dataHandler');
     $parser->set_escape_handler('escapeHandler');
     // doesn't seem to work correctly for attributes
     $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
     $parser->parse($string);
     return $this->tokens;
 }
 function _processTemplateContent($template_contents)
 {
     $compiler = $this->view->getWactTemplate()->createCompiler();
     $tag_dictionary = $compiler->getTagDictionary();
     $parser = new XML_HTMLSax3();
     $handler = new lmbWactHighlightHandler($tag_dictionary, $this->highlight_page_url);
     $handler->setTemplatePathHistory($this->history);
     $parser->set_object($handler);
     $parser->set_element_handler('openHandler', 'closeHandler');
     $parser->set_data_handler('dataHandler');
     $parser->set_escape_handler('escapeHandler');
     $parser->parse($template_contents);
     $html = $handler->getHtml();
     return $html;
 }
Beispiel #4
0
 function parse($doc)
 {
     // Save all '<' symbols
     $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '&lt;', $doc);
     // Web documents shouldn't contains \x00 symbol
     $doc = str_replace("", '', $doc);
     // Opera6 bug workaround
     $doc = str_replace("À¼", '&lt;', $doc);
     // UTF-7 encoding ASCII decode
     $doc = $this->repackUTF7($doc);
     // Instantiate the parser
     $parser = new XML_HTMLSax3();
     // Set up the parser
     $parser->set_object($this);
     $parser->set_element_handler('_openHandler', '_closeHandler');
     $parser->set_data_handler('_dataHandler');
     $parser->set_escape_handler('_escapeHandler');
     $parser->parse($doc);
     return $this->getXHTML();
 }
Beispiel #5
0
 /**
  * Main parsing fuction
  *
  * @param string $doc HTML document for processing
  *
  * @return string Processed (X)HTML document
  */
 public function parse($doc)
 {
     require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3.php';
     require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'States.php';
     require_once LAMPCMS_PATH . DS . 'lib' . DS . 'Pear' . DS . 'XML' . DS . 'HTMLSax3' . DS . 'Decorators.php';
     // Save all '<' symbols
     /**
      * @todo this will replace
      * < p> with &lt p
      * May not be what we want
      */
     $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '&lt;', $doc);
     // Web documents shouldn't contains \x00 symbol
     $doc = str_replace("", '', $doc);
     // Opera6 bug workaround
     $doc = str_replace("À¼", '&lt;', $doc);
     // UTF-7 encoding ASCII decode
     $doc = $this->repackUTF7($doc);
     // Instantiate the parser
     $parser = new \XML_HTMLSax3();
     // Set up the parser
     $parser->set_object($this);
     $parser->set_element_handler('openHandler', 'closeHandler');
     $parser->set_data_handler('dataHandler');
     $parser->set_escape_handler('escapeHandler');
     $parser->parse($doc);
     return $this->getXHTML();
 }
Beispiel #6
0
 /**
  * Main parsing fuction
  *
  * @param string $doc HTML document for processing
  *
  * @return string Processed (X)HTML document
  */
 public function parse($doc)
 {
     $result = '';
     // Save all '<' symbols
     $doc = preg_replace("/<(?=[^a-zA-Z\\/\\!\\?\\%])/", '&lt;', $doc);
     // UTF7 pack
     $doc = $this->repackUTF7($doc);
     // Instantiate the parser
     $parser = new XML_HTMLSax3();
     // Set up the parser
     $parser->set_object($this);
     $parser->set_element_handler('openHandler', 'closeHandler');
     $parser->set_data_handler('dataHandler');
     $parser->set_escape_handler('escapeHandler');
     $parser->parse($doc);
     $result = $this->getXHTML();
     $this->clear();
     return $result;
 }
 /**
  * @param  string  $data
  * @access public
  */
 function parse($data)
 {
     $parser = new XML_HTMLSax3();
     $parser->set_object($this);
     $parser->set_element_handler('openHandler', 'closeHandler');
     $parser->set_data_handler('dataHandler');
     $parser->set_escape_handler('escapeHandler');
     $parser->set_option('XML_OPTION_TRIM_DATA_NODES', 0);
     $parser->parse($data);
 }
Beispiel #8
0
 /**
  * Parses the given HTML or XML $data into an array of
  * "tokens", which are associative arrays with the following
  * properties: tag (the name of the tag), attributes (a key/value
  * array of tag attributes/properties), level (the depth of this
  * tag within the document), type (either 'open', 'complete'
  * - as in self-closing, 'cdata' - as in Character DATA, or
  * 'close'), and the value of the tag (AKA the contents of it).
  * This is also stored in the $output property of your Messy
  * object.
  * 
  * @access	public
  * @param	string	$data
  * @param	boolean	$isXml
  * @return	array
  * 
  */
 function parse($data, $isXml = false)
 {
     $this->set_object($this);
     $this->set_element_handler('handle_start_tag', 'handle_end_tag');
     $this->set_data_handler('handle_data');
     $this->set_escape_handler('handle_comment');
     $this->set_option('XML_OPTION_TRIM_DATA_NODES', 0);
     $this->output = array();
     $this->level = 0;
     $data = str_replace('<?xml:', '<xml:', $data);
     if ($isXml) {
         if ($this->safe) {
             $strip = $this->stripTagsSafe;
             $this->stripTagsSafe = array();
         } else {
             $strip = $this->stripTags;
             $this->stripTags = array();
         }
         $close = $this->selfClosing;
         $this->selfClosing = array();
         $trans = $this->transform;
         $this->transform = array();
         $this->isXml = true;
     } else {
         $this->isXml = false;
     }
     if (strpos($data, '<span id="xed-template">') !== false) {
         $data = preg_replace('/<span id="xed-template">(.*?)<\\/span>/', '\\1', $data);
     }
     if (strpos($data, '<span class="Apple-style-span"') !== false) {
         $data = preg_replace('/<span class="Apple-style-span"([^>]*?)>(.*?)><\\/span>/', '\\2', $data);
     }
     while (preg_match('|<pre>([^<]*)<br />|s', $data)) {
         $data = preg_replace('|<pre>([^<]*)<br />|s', '<pre>\\1', $data);
     }
     parent::parse($data);
     // this block handles missing closing tags
     foreach (array_reverse($this->levels) as $tag) {
         $this->level--;
         $this->output[] = array('tag' => $tag, 'attributes' => array(), 'level' => $this->level, 'type' => 'close', 'value' => '');
     }
     if ($isXml) {
         if ($this->safe) {
             $this->stripTagsSafe = $strip;
         } else {
             $this->stripTags = $strip;
         }
         $this->selfClosing = $close;
         $this->transform = $trans;
         $this->isXml = false;
     }
     return $this->output;
 }