HtmlParser::Parse PHP代码示例

示例#1

0

显示文件

文件： BitPdf.php 项目： bitweaver/pdf

 function insert_html(&$data)
 {
     // Strip out HTML comments which don't get parsed right
     $data = preg_replace('#<!.*?[^>]>#', '', $data);
     // new code starts here
     // read grammar
     //  $grammarfile='lib/htmlparser/htmlgrammar.cmp';
     //  if(!$fp=@fopen($grammarfile,'r')) die();
     //  $grammar=unserialize(fread($fp,filesize($grammarfile)));
     //  fclose($fp);
     //vd( $data );
     // create parser object and insert html code
     $htmlparser = new HtmlParser($data, $this->html_grammar, '', 0);
     // parse it
     $htmlparser->Parse();
     //debug output
     //vd( $htmlparser->content );
     // now set it together
     $src = '';
     $dummy = array();
     $this->WalkParsedArray($htmlparser->content, $src, $dummy);
     /*
     echo "<pre>";
     echo "Walk array:\n\n";
     echo $src;
     echo "</pre>";
     die();
     */
     $this->flush($src);
     // new code ends here
     /* old code starts here
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data before parsing:\n$data\n");fclose($fpd);
     	  //parse data
     	
     	  //replace <br/>
     	  $data=preg_replace("#<br/>#","\n",$data);
     	  // titlebar
     	  $data=preg_replace("#<div class=['\"]titlebar['\"]>(.+)</div>#","<C:titlebar:\$1>",$data);
     	  //$data=preg_replace("#<div class='titlebar'>(.+)</div>#e","'<C:titlebar:\$1>'.$this->add_linkdestination('$1')",$data);
     	  //line
     	  $data=preg_replace("#<hr/>#","<C:hr:>",$data);
     	  //headings
     	  $data=preg_replace("#<h1>(.+)</h1>#","<C:h1:\$1>",$data);
     	  $data=preg_replace("#<h2>(.+)</h2>#","<C:h2:\$1>",$data);
     	  $data=preg_replace("#<h3>(.+)</h3>#","<C:h3:\$1>",$data);
     	  //images
     	  $data=preg_replace("#<img(.+)src=[\"\']([^\"|^\']+)[\"\'].*\\>#","<C:img:\$2>",$data);
     	  //links
     	  $data=preg_replace("#<a.+href=[\"\']([^\"|^\']+)[\"\'].*>(.*)</a>#e","\$this->whatlink('$1','$2')",$data);
     	
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"before adding text\n");fclose($fpd);
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data:\n$data\n");fclose($fpd);
     	  $this->ezText($data,$this->mSettings['textheight']);
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"after adding text\n");fclose($fpd);
     	  iold code ends here */
 }

示例#2

0

显示文件

文件： editlib.php 项目： railfuture/tiki-website

	/**
	 * wrapper around zaufi's HTML sucker code just to use the html to wiki bit
	 *
	 * \param &$c string -- HTML in
	 * \param &$src string -- output string
	 */
	
	
	function parse_html(&$inHtml)
	{
		global $smarty;

		include ('lib/htmlparser/htmlparser.inc');
	
		// Read compiled (serialized) grammar
		$grammarfile = 'lib/htmlparser/htmlgrammar.cmp';
		if (!$fp = @fopen($grammarfile, 'r')) {
			$smarty->assign('msg', tra("Can't parse HTML data - no grammar file"));
			$smarty->display("error.tpl");
			die;
		}
		$grammar = unserialize(fread($fp, filesize($grammarfile)));
		fclose($fp);
		
		// process a few ckeditor artifacts
		$inHtml = str_replace('<p></p>', '', $inHtml);	// empty p tags are invisible
		
		// create parser object, insert html code and parse it
		$htmlparser = new HtmlParser($inHtml, $grammar, '', 0);
		$htmlparser->Parse();
		// Should I try to convert HTML to wiki?
		$out_data = '';
		/*
		 * ['stack'] = array
		 * Speacial keys introduced to convert to Wiki
		 * - ['wikitags']     = the number of 'wikistack' entries produced by the html tag
		 * 
		 * ['wikistack'] = array(), is used to save the wiki markup for the linebreak handling (1 array = 1 html tag)
		 * Each array entry contains the following keys: 
		 * - ['begin']        = array() of begin markups (1 style definition = 1 array entry)
		 * - ['end']          = array() of end markups
		 * 
		 * wiki_lbr  = true if we must use '%%%' for linebreaks instead of '\n'
		 */
		$p = array('stack' => array(), 'listack' => array(), 'wikistack' => array(),  
			'wiki_lbr' => 0, 'first_td' => false, 'first_tr' => false);
		$this->walk_and_parse($htmlparser->content, $out_data, $p, '');
		// Is some tags still opened? (It can be if HTML not valid, but this is not reason
		// to produce invalid wiki :)
		while (count($p['stack'])) {
			$e = end($p['stack']);
			$out_data .= $e['string'];
			array_pop($p['stack']);
		}
		// Unclosed lists r ignored... wiki have no special start/end lists syntax....
		// OK. Things remains to do:
		// 1) fix linked images
		$out_data = preg_replace(',\[(.*)\|\(img src=(.*)\)\],mU', '{img src=$2 link=$1}', $out_data);
		// 2) fix remains images (not in links)
		$out_data = preg_replace(',\(img src=(.*)\),mU', '{img src=$1}', $out_data);
		// 3) remove empty lines
		$out_data = preg_replace(",[\n]+,mU", "\n", $out_data);
		// 4) remove nbsp's
		$out_data = preg_replace(",&#160;,mU", " ", $out_data);
		
		return $out_data;
	}	// end parse_html

示例#3

0

显示文件

文件： tiki-editpage.php 项目： Kraiany/kraiany_site_docker

/**
 * wrapper around zaufi's HTML sucker code just to use the html to wiki bit
 *
 * \param &$c string -- HTML in
 * \param &$src string -- output string
 */
function parse_html(&$inHtml)
{
    //error_reporting(6143);
    // Read compiled (serialized) grammar
    $grammarfile = 'lib/htmlparser/htmlgrammar.cmp';
    if (!($fp = @fopen($grammarfile, 'r'))) {
        $smarty->assign('msg', tra("Can't parse HTML data - no grammar file"));
        $smarty->display("error.tpl");
        die;
    }
    $grammar = unserialize(fread($fp, filesize($grammarfile)));
    fclose($fp);
    // create parser object, insert html code and parse it
    $htmlparser = new HtmlParser($inHtml, $grammar, '', 0);
    $htmlparser->Parse();
    // Should I try to convert HTML to wiki?
    $out_data = '';
    $p = array('stack' => array(), 'listack' => array(), 'first_td' => false, 'first_tr' => false);
    walk_and_parse($htmlparser->content, $out_data, $p, '');
    // Is some tags still opened? (It can be if HTML not valid, but this is not reason
    // to produce invalid wiki :)
    while (count($p['stack'])) {
        $e = end($p['stack']);
        $out_data .= $e['string'];
        array_pop($p['stack']);
    }
    // Unclosed lists r ignored... wiki have no special start/end lists syntax....
    // OK. Things remains to do:
    // 1) fix linked images
    $out_data = preg_replace(',\\[(.*)\\|\\(img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $out_data);
    // 2) fix remains images (not in links)
    $out_data = preg_replace(',\\(img src=(.*)\\),mU', '{img src=$1}', $out_data);
    // 3) remove empty lines
    $out_data = preg_replace(",[\n]+,mU", "\n", $out_data);
    // 4) remove nbsp's
    $out_data = preg_replace(",&#160;,mU", " ", $out_data);
    return $out_data;
}

示例#4

0

显示文件

文件： edit.php 项目： bitweaver/wiki

 $sdta = @file_get_contents($suck_url);
 if (isset($php_errormsg) && strlen($php_errormsg)) {
     $gBitSystem->fatalError(tra("Can't import remote HTML page"));
 }
 // Need to parse HTML?
 if ($parsehtml == 'y') {
     // Read compiled( serialized ) grammar
     $grammarfile = UTIL_PKG_PATH . 'htmlparser/htmlgrammar.cmp';
     if (!($fp = @fopen($grammarfile, 'r'))) {
         $gBitSystem->fatalError(tra("Can't parse remote HTML page"));
     }
     $grammar = unserialize(fread($fp, filesize($grammarfile)));
     fclose($fp);
     // create parser object, insert html code and parse it
     $htmlparser = new HtmlParser($sdta, $grammar, '', 0);
     $htmlparser->Parse();
     // Should I try to convert HTML to wiki?
     $parseddata = '';
     $p = array('stack' => array(), 'listack' => array(), 'first_td' => false);
     walk_and_parse($htmlparser->content, $parseddata, $p);
     // Is some tags still opened?( It can be if HTML not valid, but this is not reason
     // to produce invalid wiki : )
     while (count($p['stack'])) {
         $e = end($p['stack']);
         $sdta .= $e['string'];
         array_pop($p['stack']);
     }
     // Unclosed lists r ignored... wiki have no special start/end lists syntax....
     // OK. Things remains to do:
     // 1 ) fix linked images
     $parseddata = preg_replace(',\\[(.*)\\|\\( img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $parseddata);

示例#5

0

显示文件

文件： w3sTemplate.class.php 项目： jmp0207/w3studiocms

 /**
  * Returns the available classes for a given slot. Can retrieve only the class
  * name or the full CSS style. This is made with the mode parameter
  *
  * @param      str  The slot's name.
  * @param      int  optional 0 retrieves only the class name [Default]
  *                           1 Retrieve the full css style
  *
  * @return     array  The found classes
  */
 public static function findStylesheetClasses($content, $mode = 0)
 {
     // This is only a paliative solution. Hope someone can fix the parse class: I don't know Call-time pass-by-reference
     ini_set('error_reporting', 'E_ERROR');
     require_once dirname(__FILE__) . '/../tools/parser/htmlparser.inc';
     require_once dirname(__FILE__) . '/../tools/parser/common.inc';
     $slotName = $content->getW3sSlot()->getSlotName();
     $page = $content->getW3sPage();
     // Opens the template and parses its structure
     $templateAttributes = self::retrieveTemplateAttributesFromPage($page);
     $templateFile = self::getTemplateFile($templateAttributes["projectName"], $templateAttributes["templateName"]);
     $p = new HtmlParser($templateFile, unserialize(Read_File("parser/htmlgrammar.cmp")), $templateFile, 1);
     $p->Parse();
     $src = "";
     GetPageSrc($p->content, $src);
     ob_start();
     PrintArray($p->content);
     $contents = ob_get_clean();
     // Finds the id of Slots
     $i = 1;
     $elements = array($slotName);
     while (1) {
         preg_match('/(.*)\\[content\\].*\\[pars\\]\\[id\\]\\[value\\]=' . $slotName . '/', $contents, $res);
         if (count($res) == 0) {
             break;
         }
         $startKey = str_replace("[", "\\[", $res[1]);
         $startKey = str_replace("]", "\\]", $startKey);
         preg_match('/' . $startKey . '\\[pars\\]\\[id\\]\\[value\\]=(.*)/', $contents, $res);
         $elements[] = $res[1];
         $slotName = $res[1];
         $i++;
         // Prevents blocks if an infinite loop occours if a non well-format template is searched
         if ($i == 100) {
             break;
         }
     }
     // Finds all the template's stylesheets
     $fp = fopen($templateFile, "r");
     $templateContents = fread($fp, filesize($templateFile));
     fclose($fp);
     $templateContents = str_replace("\r\n", "", $templateContents);
     preg_match_all('/.*?rel=["|\']stylesheet["|\'].*?href\\s*=\\s*["|\'](.*?)["|\'].*?/', $templateContents, $stylesheets);
     // Creates a single stylesheet from the stylesheets retrieved
     $contents = '';
     foreach ($stylesheets[1] as $stylesheet) {
         $stylesheet = substr($stylesheet, 1, strlen($stylesheet));
         $fp = fopen($stylesheet, "r");
         $currentContent = fread($fp, filesize($stylesheet));
         fclose($fp);
         $currentContent = str_replace("\r\n", "", $currentContent);
         $currentContent = preg_replace('/HTML>.*?}+?/', '', $currentContent);
         $contents .= $currentContent;
     }
     // Find classes from xhtml elements
     $result = $mode == 0 ? array('w3sNone' => 'None') : array();
     foreach ($elements as $element) {
         $expression = $mode == 0 ? '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*\\.(.*?)\\{+?/' : '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*(\\..*?\\{.*?\\})+?/';
         preg_match_all($expression, $contents, $classes);
         foreach ($classes[1] as $class) {
             if ($mode == 0) {
                 $result[$class] = $class;
             } else {
                 $result[] = $class;
             }
         }
     }
     // Find classes not associated to xhtml elements
     $expression = $mode == 0 ? '/(^|})\\.(.*?)\\{+?/' : '/(^|})(\\..*?\\{.*?\\})+?/';
     preg_match_all($expression, $contents, $classes);
     foreach ($classes[2] as $class) {
         if ($mode == 0) {
             $result[$class] = $class;
         } else {
             $result[] = $class;
         }
     }
     return $result;
 }

PHP HtmlParser::Parse示例