function sanitise_html($original_html) { /** * this function cleans up the crud that gets inserted by programs such as Word or CKeditor, or Skype */ $original_html = sanitise_html_essential($original_html); $original_html = html_fixImageResizes($original_html); $original_html = str_replace("\n", '{{CARRIAGERETURN}}', $original_html); $original_html = str_replace("\r", '{{LINERETURN}}', $original_html); do { $html = $original_html; // { clean white-space $html = str_replace('{{LINERETURN}}{{CARRIAGERETURN}}', "{{CARRIAGERETURN}}", $html); $html = str_replace('>{{CARRIAGERETURN}}', '>', $html); $html = str_replace('{{CARRIAGERETURN}}{{CARRIAGERETURN}}', '{{CARRIAGERETURN}}', $html); // $html = preg_replace('/\s+/',' ',$html); $html = preg_replace("/<p>\\s*/", '<p>', $html); $html = preg_replace("#\\s*<br( ?/)?>\\s*#", '<br />', $html); $html = preg_replace("#\\s*<li>\\s*#", '<li>', $html); $html = str_replace(">\t", '>', $html); $html = preg_replace('#<p([^>]*)>\\s*\\ \\s*</p>#', '<p\\1></p>', $html); // } // { remove empty elements and parameters $html = preg_replace('/<!--[^>]*-->/', '', $html); // } // { combine nested elements $html = preg_replace('#<span style="([^"]*?);?">(\\s*)<span style="([^"]*)">([^<]*|<img[^>]*>)</span>(\\s*)</span>#', '\\2<span style="\\1;\\3">\\4</span>\\5', $html); $html = preg_replace('#<a href="([^"]*)">(\\s*)<span style="([^"]*)">([^<]*|<img[^>]*>)</span>(\\s*)</a>#', '\\2<a href="\\1" style="\\3">\\4</a>\\5', $html); $html = preg_replace('#<strong>(\\s*)<span style="([^"]*)">([^<]*)</span>(\\s*)</strong>#', '<strong style="\\2">\\1\\3\\4</strong>', $html); $html = preg_replace('#<b>(\\s*)<span style="([^"]*)">([^<]*)</span>(\\s*)</b>#', '<b style="\\2">\\1\\3\\4</b>', $html); $html = preg_replace('#<li>(\\s*)<span style="([^"]*)">([^<]*)</span>(\\s*)</li>#', '<li style="\\2">\\1\\3\\4</li>', $html); $html = preg_replace('#<p>(\\s*)<span style="([^"]*)">([^<]*)</span>(\\s*)</p>#', '<p style="\\2">\\1\\3\\4</p>', $html); $html = preg_replace('#<span style="([^"]*)">(\\s*)<strong>([^<]*)</strong>(\\s*)</span>#', '\\2<strong style="\\1">\\3</strong>\\4', $html); $html = preg_replace('#<span style="([^"]*?);?">(\\s*)<strong style="([^"]*)">([^<]*)</strong>(\\s*)</span>#', '\\2<strong style="\\1;\\3">\\4</strong>\\5', $html); $html = preg_replace("/<p>\\s*(<img[^>]*>)\\s*<\\/p>/", '\\1', $html); $html = preg_replace('/<span( style="font-[^:]*:[^"]*")?>\\s*(<img[^>]*>)\\s*<\\/span>/', '\\2', $html); $html = preg_replace("/<strong>\\s*(<img[^>]*>)\\s*<\\/strong>/", '\\1', $html); // } // { remove unnecessary elements $html = preg_replace('#<meta [^>]*>(.*?)</meta>#', '\\1', $html); // } // { strip repeated CSS inline elements (TODO: make this more efficient...) $html = str_replace('font-size: large;font-size: large', 'font-size: large', $html); // } // { strip useless CSS $sillystuff = ' style="([^"]*)(color:[^;"]*|font-size:[^;"]*|font-family:[^;"]*|line-height:[^;"]*);([^"]*)"'; $html = preg_replace('#\\s*<span' . $sillystuff . '>\\s*</span>\\s*#', '<span style="\\1\\3"></span>', $html); $html = str_replace('<span style=""></span>', '<span></span>', $html); $html = preg_replace('#\\s*<p' . $sillystuff . '>\\s*</p>\\s*#', '<p style="\\1\\3"></p>', $html); $html = str_replace('<p style=""></p>', '<p></p>', $html); // } $has_changed = $html != $original_html; $original_html = $html; } while ($has_changed); // { old-style tabs if (strpos($html, '%TABPAGE%')) { $rand = md5(mt_rand()); $test = preg_replace('/<p>[^<]*(%TAB[^%]*%)[^<]*<\\/p>/', '\\1', $html); $test = str_replace('%TABEND%', '</div></div><script>$(function(){$("#' . $rand . '").tabs();});</script>', $test); $parts = preg_split('/%TAB[^%]*%/', $test); $headings = array(); for ($i = 1; $i < count($parts); ++$i) { $headings[] = preg_replace('/<[^>]*>/', '', preg_replace('/^[^<]*<h2[^>]*>(.*?)<\\/h2>.*/', '\\1', $parts[$i])); $replacement = ($i > 1 ? '</div>' : '') . '<div id="' . $rand . '-' . strtolower(preg_replace('/[^a-zA-Z0-9]/', '', $headings[$i - 1])) . '">'; $parts[$i] = preg_replace('/^[^<]*<h2[^>]*>(.*?)<\\/h2>/', $replacement, $parts[$i]); } $menu = '<div id="' . $rand . '" class="tabs"><ul>'; foreach ($headings as $h) { $menu .= '<li><a href="#' . $rand . '-' . strtolower(preg_replace('/[^a-zA-Z0-9]/', '', $h)) . '">' . htmlspecialchars($h) . '</a></li>'; } $parts[0] .= $menu . '</ul>'; $html = join('', $parts); } // } $html = str_replace('{{CARRIAGERETURN}}', "\n", $html); $html = str_replace('{{LINERETURN}}', "\r", $html); return $html; }
$original_body = isset($_REQUEST['body']) ? $_REQUEST['body'] : ''; $body = $original_body; $body = sanitise_html($body); $name = $_REQUEST['name']; // { check that name is not duplicate of existing page if (dbOne('select id from pages where name="' . addslashes($name) . '" and parent=' . $pid . ' and id!="' . $id . '"', 'id')) { $i = 2; while (dbOne('select id from pages where name="' . addslashes($name . $i) . '" and parent=' . $pid . ' and id!="' . $id . '"', 'id')) { $i++; } $msgs .= '<em>' . __('A page named "%1" already exists. Page name amended to "%2"', $name, $name . $i) . '</em>'; $name = $name . $i; } // } // } $q = 'update pages set importance="' . $importance . '"' . ',template="' . addslashes($template) . '",edate=now()' . ',type="' . addslashes($_POST['type']) . '"' . ',associated_date="' . addslashes($associated_date) . '"' . ',keywords="' . addslashes($keywords) . '"' . ',description="' . addslashes($description) . '"' . ',name="' . addslashes($name) . '",title="' . addslashes($_POST['title']) . '"' . ',original_body="' . addslashes(sanitise_html_essential($original_body)) . '"' . ',body="' . addslashes($body) . '",parent=' . $pid . ',special=' . $special; $q .= ' where id=' . $id; dbQuery($q); // { page_vars dbQuery('delete from page_vars where page_id="' . $id . '"'); $pagevars = isset($_REQUEST['page_vars']) ? $_REQUEST['page_vars'] : array(); if (@$_REQUEST['short_url']) { dbQuery('insert into short_urls set cdate=now(),page_id=' . $id . ',short_url="' . addslashes($_REQUEST['short_url']) . '"'); $pagevars['_short_url'] = 1; } else { dbQuery('delete from short_urls where page_id=' . $id); unset($pagevars['_short_url']); } if (is_array($pagevars)) { foreach ($pagevars as $k => $v) { if (is_array($v)) {