function process_p($sample_html) { $open_regexp = implode("|", array("p", "dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes")); $close_regexp = implode("|", array("dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes", "body")); $open = mk_open_tag_regexp("(" . $open_regexp . ")"); $close = mk_close_tag_regexp("(" . $close_regexp . ")"); $offset = 0; while (preg_match("#^(.*?)(<\\s*p(\\s+[^>]*?)?>)(.*?)({$open}|{$close})#is", substr($sample_html, $offset), $matches)) { if (!preg_match("#<\\s*/\\s*p\\s*>#is", $matches[3])) { $cutpos = $offset + strlen($matches[1]) + strlen($matches[2]) + strlen($matches[4]); $sample_html = substr_replace($sample_html, "</p>", $cutpos, 0); $offset = $cutpos + 4; } else { $offset += strlen($matches[1]) + 1; } } return $sample_html; }
function process_body($html) { $open = mk_open_tag_regexp("body"); $close = mk_close_tag_regexp("body"); $ohtml = mk_open_tag_regexp("html"); $chtml = mk_close_tag_regexp("html"); $chead = mk_close_tag_regexp("head"); if (!preg_match("#{$open}#is", $html)) { if (preg_match("#{$chead}#is", $html)) { $html = preg_replace("#({$chead})#is", "\\1<body>", $html); } else { $html = preg_replace("#({$ohtml})#is", "\\1<body>", $html); } } if (!preg_match("#{$close}#is", $html)) { $html = preg_replace("#({$chtml})#is", "</body>\\1", $html); } // Now check is there any data between </head> and <body>. $html = preg_replace("#({$chead})(.+)({$open})#is", "\\1\\3\\2", $html); // Check if there's any data between </body> and </html> $html = preg_replace("#({$close})(.+)({$chtml})#is", "\\2\\1\\3", $html); return $html; }