function insert_html(&$data) { // Strip out HTML comments which don't get parsed right $data = preg_replace('#<!.*?[^>]>#', '', $data); // new code starts here // read grammar // $grammarfile='lib/htmlparser/htmlgrammar.cmp'; // if(!$fp=@fopen($grammarfile,'r')) die(); // $grammar=unserialize(fread($fp,filesize($grammarfile))); // fclose($fp); //vd( $data ); // create parser object and insert html code $htmlparser = new HtmlParser($data, $this->html_grammar, '', 0); // parse it $htmlparser->Parse(); //debug output //vd( $htmlparser->content ); // now set it together $src = ''; $dummy = array(); $this->WalkParsedArray($htmlparser->content, $src, $dummy); /* echo "<pre>"; echo "Walk array:\n\n"; echo $src; echo "</pre>"; die(); */ $this->flush($src); // new code ends here /* old code starts here //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data before parsing:\n$data\n");fclose($fpd); //parse data //replace <br/> $data=preg_replace("#<br/>#","\n",$data); // titlebar $data=preg_replace("#<div class=['\"]titlebar['\"]>(.+)</div>#","<C:titlebar:\$1>",$data); //$data=preg_replace("#<div class='titlebar'>(.+)</div>#e","'<C:titlebar:\$1>'.$this->add_linkdestination('$1')",$data); //line $data=preg_replace("#<hr/>#","<C:hr:>",$data); //headings $data=preg_replace("#<h1>(.+)</h1>#","<C:h1:\$1>",$data); $data=preg_replace("#<h2>(.+)</h2>#","<C:h2:\$1>",$data); $data=preg_replace("#<h3>(.+)</h3>#","<C:h3:\$1>",$data); //images $data=preg_replace("#<img(.+)src=[\"\']([^\"|^\']+)[\"\'].*\\>#","<C:img:\$2>",$data); //links $data=preg_replace("#<a.+href=[\"\']([^\"|^\']+)[\"\'].*>(.*)</a>#e","\$this->whatlink('$1','$2')",$data); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"before adding text\n");fclose($fpd); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data:\n$data\n");fclose($fpd); $this->ezText($data,$this->mSettings['textheight']); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"after adding text\n");fclose($fpd); iold code ends here */ }
/** * wrapper around zaufi's HTML sucker code just to use the html to wiki bit * * \param &$c string -- HTML in * \param &$src string -- output string */ function parse_html(&$inHtml) { global $smarty; include ('lib/htmlparser/htmlparser.inc'); // Read compiled (serialized) grammar $grammarfile = 'lib/htmlparser/htmlgrammar.cmp'; if (!$fp = @fopen($grammarfile, 'r')) { $smarty->assign('msg', tra("Can't parse HTML data - no grammar file")); $smarty->display("error.tpl"); die; } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // process a few ckeditor artifacts $inHtml = str_replace('<p></p>', '', $inHtml); // empty p tags are invisible // create parser object, insert html code and parse it $htmlparser = new HtmlParser($inHtml, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $out_data = ''; /* * ['stack'] = array * Speacial keys introduced to convert to Wiki * - ['wikitags'] = the number of 'wikistack' entries produced by the html tag * * ['wikistack'] = array(), is used to save the wiki markup for the linebreak handling (1 array = 1 html tag) * Each array entry contains the following keys: * - ['begin'] = array() of begin markups (1 style definition = 1 array entry) * - ['end'] = array() of end markups * * wiki_lbr = true if we must use '%%%' for linebreaks instead of '\n' */ $p = array('stack' => array(), 'listack' => array(), 'wikistack' => array(), 'wiki_lbr' => 0, 'first_td' => false, 'first_tr' => false); $this->walk_and_parse($htmlparser->content, $out_data, $p, ''); // Is some tags still opened? (It can be if HTML not valid, but this is not reason // to produce invalid wiki :) while (count($p['stack'])) { $e = end($p['stack']); $out_data .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1) fix linked images $out_data = preg_replace(',\[(.*)\|\(img src=(.*)\)\],mU', '{img src=$2 link=$1}', $out_data); // 2) fix remains images (not in links) $out_data = preg_replace(',\(img src=(.*)\),mU', '{img src=$1}', $out_data); // 3) remove empty lines $out_data = preg_replace(",[\n]+,mU", "\n", $out_data); // 4) remove nbsp's $out_data = preg_replace(", ,mU", " ", $out_data); return $out_data; } // end parse_html
/** * wrapper around zaufi's HTML sucker code just to use the html to wiki bit * * \param &$c string -- HTML in * \param &$src string -- output string */ function parse_html(&$inHtml) { //error_reporting(6143); // Read compiled (serialized) grammar $grammarfile = 'lib/htmlparser/htmlgrammar.cmp'; if (!($fp = @fopen($grammarfile, 'r'))) { $smarty->assign('msg', tra("Can't parse HTML data - no grammar file")); $smarty->display("error.tpl"); die; } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // create parser object, insert html code and parse it $htmlparser = new HtmlParser($inHtml, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $out_data = ''; $p = array('stack' => array(), 'listack' => array(), 'first_td' => false, 'first_tr' => false); walk_and_parse($htmlparser->content, $out_data, $p, ''); // Is some tags still opened? (It can be if HTML not valid, but this is not reason // to produce invalid wiki :) while (count($p['stack'])) { $e = end($p['stack']); $out_data .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1) fix linked images $out_data = preg_replace(',\\[(.*)\\|\\(img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $out_data); // 2) fix remains images (not in links) $out_data = preg_replace(',\\(img src=(.*)\\),mU', '{img src=$1}', $out_data); // 3) remove empty lines $out_data = preg_replace(",[\n]+,mU", "\n", $out_data); // 4) remove nbsp's $out_data = preg_replace(", ,mU", " ", $out_data); return $out_data; }
$sdta = @file_get_contents($suck_url); if (isset($php_errormsg) && strlen($php_errormsg)) { $gBitSystem->fatalError(tra("Can't import remote HTML page")); } // Need to parse HTML? if ($parsehtml == 'y') { // Read compiled( serialized ) grammar $grammarfile = UTIL_PKG_PATH . 'htmlparser/htmlgrammar.cmp'; if (!($fp = @fopen($grammarfile, 'r'))) { $gBitSystem->fatalError(tra("Can't parse remote HTML page")); } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // create parser object, insert html code and parse it $htmlparser = new HtmlParser($sdta, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $parseddata = ''; $p = array('stack' => array(), 'listack' => array(), 'first_td' => false); walk_and_parse($htmlparser->content, $parseddata, $p); // Is some tags still opened?( It can be if HTML not valid, but this is not reason // to produce invalid wiki : ) while (count($p['stack'])) { $e = end($p['stack']); $sdta .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1 ) fix linked images $parseddata = preg_replace(',\\[(.*)\\|\\( img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $parseddata);
/** * Returns the available classes for a given slot. Can retrieve only the class * name or the full CSS style. This is made with the mode parameter * * @param str The slot's name. * @param int optional 0 retrieves only the class name [Default] * 1 Retrieve the full css style * * @return array The found classes */ public static function findStylesheetClasses($content, $mode = 0) { // This is only a paliative solution. Hope someone can fix the parse class: I don't know Call-time pass-by-reference ini_set('error_reporting', 'E_ERROR'); require_once dirname(__FILE__) . '/../tools/parser/htmlparser.inc'; require_once dirname(__FILE__) . '/../tools/parser/common.inc'; $slotName = $content->getW3sSlot()->getSlotName(); $page = $content->getW3sPage(); // Opens the template and parses its structure $templateAttributes = self::retrieveTemplateAttributesFromPage($page); $templateFile = self::getTemplateFile($templateAttributes["projectName"], $templateAttributes["templateName"]); $p = new HtmlParser($templateFile, unserialize(Read_File("parser/htmlgrammar.cmp")), $templateFile, 1); $p->Parse(); $src = ""; GetPageSrc($p->content, $src); ob_start(); PrintArray($p->content); $contents = ob_get_clean(); // Finds the id of Slots $i = 1; $elements = array($slotName); while (1) { preg_match('/(.*)\\[content\\].*\\[pars\\]\\[id\\]\\[value\\]=' . $slotName . '/', $contents, $res); if (count($res) == 0) { break; } $startKey = str_replace("[", "\\[", $res[1]); $startKey = str_replace("]", "\\]", $startKey); preg_match('/' . $startKey . '\\[pars\\]\\[id\\]\\[value\\]=(.*)/', $contents, $res); $elements[] = $res[1]; $slotName = $res[1]; $i++; // Prevents blocks if an infinite loop occours if a non well-format template is searched if ($i == 100) { break; } } // Finds all the template's stylesheets $fp = fopen($templateFile, "r"); $templateContents = fread($fp, filesize($templateFile)); fclose($fp); $templateContents = str_replace("\r\n", "", $templateContents); preg_match_all('/.*?rel=["|\']stylesheet["|\'].*?href\\s*=\\s*["|\'](.*?)["|\'].*?/', $templateContents, $stylesheets); // Creates a single stylesheet from the stylesheets retrieved $contents = ''; foreach ($stylesheets[1] as $stylesheet) { $stylesheet = substr($stylesheet, 1, strlen($stylesheet)); $fp = fopen($stylesheet, "r"); $currentContent = fread($fp, filesize($stylesheet)); fclose($fp); $currentContent = str_replace("\r\n", "", $currentContent); $currentContent = preg_replace('/HTML>.*?}+?/', '', $currentContent); $contents .= $currentContent; } // Find classes from xhtml elements $result = $mode == 0 ? array('w3sNone' => 'None') : array(); foreach ($elements as $element) { $expression = $mode == 0 ? '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*\\.(.*?)\\{+?/' : '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*(\\..*?\\{.*?\\})+?/'; preg_match_all($expression, $contents, $classes); foreach ($classes[1] as $class) { if ($mode == 0) { $result[$class] = $class; } else { $result[] = $class; } } } // Find classes not associated to xhtml elements $expression = $mode == 0 ? '/(^|})\\.(.*?)\\{+?/' : '/(^|})(\\..*?\\{.*?\\})+?/'; preg_match_all($expression, $contents, $classes); foreach ($classes[2] as $class) { if ($mode == 0) { $result[$class] = $class; } else { $result[] = $class; } } return $result; }