/**
 * Prepares XHTML content for text difference comparison. 
 * @param string $content XHTML content [NO SLASHES]
 * @return array Array of ouwiki_line objects
 */
function ouwiki_diff_html_to_lines($content)
{
    // These functions are a pain mostly because PHP preg_* don't provide
    // proper information as to the start/end position of matches. As a
    // consequence there is a lot of hackery going down. At every point we
    // replace things with spaces rather than getting rid, in order to store
    // positions within original content.
    // Get rid of all script, style, object tags (that might contain non-text
    // outside tags)
    $content = preg_replace_callback('^(<script .*?</script>)|(<object .*?</object>)|(<style .*?</style>)^i', create_function('$matches', 'return preg_replace("/./"," ",$matches[0]);'), $content);
    // Get rid of all ` symbols as we are going to use these for a marker later.
    $content = preg_replace('/[`]/', ' ', $content);
    // Put line breaks on block tags. Mark each line break with ` symbol
    $blocktags = array('p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'td', 'li');
    $taglist = '';
    foreach ($blocktags as $blocktag) {
        if ($taglist !== '') {
            $taglist .= '|';
        }
        $taglist .= "<{$blocktag}>|<\\/{$blocktag}>";
    }
    $content = preg_replace_callback('/((' . $taglist . ')\\s*)+/i', create_function('$matches', 'return "`".preg_replace("/./"," ",substr($matches[0],1));'), $content);
    // Now go through splitting each line
    $lines = array();
    $index = 1;
    $pos = 0;
    while ($pos < strlen($content)) {
        $nextline = strpos($content, '`', $pos);
        if ($nextline === false) {
            // No more line breaks? Take content to end
            $nextline = strlen($content);
        }
        $linestr = substr($content, $pos, $nextline - $pos);
        $line = new ouwiki_line($linestr, $pos);
        if (!$line->is_empty()) {
            $lines[$index++] = $line;
        }
        $pos = $nextline + 1;
    }
    return $lines;
}