示例#1
0
 /**
  * Parse given html into an AST without attempting to fix any of the HTML
  * 
  * @param  string $html
  * @return PHPricot_Document
  */
 public function parse($html)
 {
     $this->stack = array();
     $this->document = $this->currentParent = new PHPricot_Document();
     $this->parser = html_parser_create();
     html_parser_data_handler($this->parser, array($this, "text"));
     html_parser_starttag_handler($this->parser, array($this, "startTag"));
     html_parser_endtag_handler($this->parser, array($this, "endTag"));
     html_parser_comment_handler($this->parser, array($this, "comment"));
     html_parser_parse($this->parser, $html);
     html_parser_free($this->parser);
     return $this->document;
 }
示例#2
0
 /**
  *	Parses HTML. Return list of following format:
  *		start tag:	array(H_START, tagName, tagAttrs)
  *		end tag:	array(H_END, tagName)
  *		text:		array(H_TEXT, text)
  *		comment:	array(H_COMMENT, comment)
  *	Tags names are UPPERCASED.
  *
  *	@param		html	string		Source html
  *	@return		list
  */
 public function parseHtml($html)
 {
     static $s, $e, $d, $c;
     if (!$s) {
         define('H_START', 0);
         define('H_END', 1);
         define('H_TEXT', 2);
         define('H_COMMENT', 3);
         $s = create_function('$tag, $attrs', '$GLOBALS["__parsed_html"][] = array(H_START, $tag, $attrs);');
         $e = create_function('$tag', '$GLOBALS["__parsed_html"][] = array(H_END, $tag);');
         $d = create_function('$text', '$GLOBALS["__parsed_html"][] = array(H_TEXT, $text);');
         $c = create_function('$text', '$GLOBALS["__parsed_html"][] = array(H_COMMENT, $text);');
     }
     $GLOBALS["__parsed_html"] = array();
     $p = html_parser_create();
     html_parser_starttag_handler($p, $s);
     html_parser_endtag_handler($p, $e);
     html_parser_data_handler($p, $d);
     html_parser_comment_handler($p, $c);
     html_parser_parse($p, $html, 1);
     html_parser_free($p);
     return $GLOBALS["__parsed_html"];
 }