Example #1
0
function process_p($sample_html)
{
    $open_regexp = implode("|", array("p", "dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes"));
    $close_regexp = implode("|", array("dl", "div", "noscript", "blockquote", "form", "hr", "table", "fieldset", "address", "ul", "ol", "li", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "frameset", "noframes", "body"));
    $open = mk_open_tag_regexp("(" . $open_regexp . ")");
    $close = mk_close_tag_regexp("(" . $close_regexp . ")");
    $offset = 0;
    while (preg_match("#^(.*?)(<\\s*p(\\s+[^>]*?)?>)(.*?)({$open}|{$close})#is", substr($sample_html, $offset), $matches)) {
        if (!preg_match("#<\\s*/\\s*p\\s*>#is", $matches[3])) {
            $cutpos = $offset + strlen($matches[1]) + strlen($matches[2]) + strlen($matches[4]);
            $sample_html = substr_replace($sample_html, "</p>", $cutpos, 0);
            $offset = $cutpos + 4;
        } else {
            $offset += strlen($matches[1]) + 1;
        }
    }
    return $sample_html;
}
function process_body($html)
{
    $open = mk_open_tag_regexp("body");
    $close = mk_close_tag_regexp("body");
    $ohtml = mk_open_tag_regexp("html");
    $chtml = mk_close_tag_regexp("html");
    $chead = mk_close_tag_regexp("head");
    if (!preg_match("#{$open}#is", $html)) {
        if (preg_match("#{$chead}#is", $html)) {
            $html = preg_replace("#({$chead})#is", "\\1<body>", $html);
        } else {
            $html = preg_replace("#({$ohtml})#is", "\\1<body>", $html);
        }
    }
    if (!preg_match("#{$close}#is", $html)) {
        $html = preg_replace("#({$chtml})#is", "</body>\\1", $html);
    }
    // Now check is there any data between </head> and <body>.
    $html = preg_replace("#({$chead})(.+)({$open})#is", "\\1\\3\\2", $html);
    // Check if there's any data between </body> and </html>
    $html = preg_replace("#({$close})(.+)({$chtml})#is", "\\2\\1\\3", $html);
    return $html;
}