function autoclose_tag_cleanup(&$sample_html, $offset, $tags_raw, $nested, $close) { $tags = mk_open_tag_regexp($tags_raw); skip_to($sample_html, $offset, $tags_raw); while (preg_match("#^(.*?)({$tags})#is", substr($sample_html, $offset), $matches)) { // convert tag name found to lower case $tag = strtolower($matches[3]); // calculate position of the tag found $tag_start = $offset + strlen($matches[1]); $tag_end = $tag_start + strlen($matches[2]); if ($tag == $close) { return $tag_end; } // REQ: PHP 4.0.5 if (isset($nested[$tag])) { $offset = $nested[$tag]($sample_html, $tag_end); } else { $to_be_inserted = "<" . $close . ">"; $sample_html = substr_replace($sample_html, $to_be_inserted, $tag_start, 0); return $tag_start + strlen($to_be_inserted); } skip_to($sample_html, $offset, $tags_raw); } return $offset; }
function process_p($sample_html) { $open_regexp = implode("|", array("p", "dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes")); $close_regexp = implode("|", array("dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes", "body")); $open = mk_open_tag_regexp("(" . $open_regexp . ")"); $close = mk_close_tag_regexp("(" . $close_regexp . ")"); $offset = 0; while (preg_match("#^(.*?)(<\\s*p(\\s+[^>]*?)?>)(.*?)({$open}|{$close})#is", substr($sample_html, $offset), $matches)) { if (!preg_match("#<\\s*/\\s*p\\s*>#is", $matches[3])) { $cutpos = $offset + strlen($matches[1]) + strlen($matches[2]) + strlen($matches[4]); $sample_html = substr_replace($sample_html, "</p>", $cutpos, 0); $offset = $cutpos + 4; } else { $offset += strlen($matches[1]) + 1; } } return $sample_html; }
function process_body($html) { $open = mk_open_tag_regexp("body"); $close = mk_close_tag_regexp("body"); $ohtml = mk_open_tag_regexp("html"); $chtml = mk_close_tag_regexp("html"); $chead = mk_close_tag_regexp("head"); if (!preg_match("#{$open}#is", $html)) { if (preg_match("#{$chead}#is", $html)) { $html = preg_replace("#({$chead})#is", "\\1<body>", $html); } else { $html = preg_replace("#({$ohtml})#is", "\\1<body>", $html); } } if (!preg_match("#{$close}#is", $html)) { $html = preg_replace("#({$chtml})#is", "</body>\\1", $html); } // Now check is there any data between </head> and <body>. $html = preg_replace("#({$chead})(.+)({$open})#is", "\\1\\3\\2", $html); // Check if there's any data between </body> and </html> $html = preg_replace("#({$close})(.+)({$chtml})#is", "\\2\\1\\3", $html); return $html; }