/**
 * Compares two files based initially on lines and then on words within the lines that
 * differ.
 * @param array $lines1 Array of ouwiki_line
 * @param array $lines2 Array of ouwiki_line
 * @return array (deleted,added); deleted and added are arrays of ouwiki_word with
 *   position numbers from $lines1 and $lines2 respectively 
 */
function ouwiki_diff_words($lines1, $lines2)
{
    // Prepare arrays
    $deleted = array();
    $added = array();
    // Get line difference
    $linediff = ouwiki_diff(ouwiki_line::get_as_strings($lines1), ouwiki_line::get_as_strings($lines2));
    // Handle lines that were entirely deleted
    foreach ($linediff->deletes as $deletedline) {
        $deleted = array_merge($deleted, $lines1[$deletedline]->words);
    }
    // And ones that were entirely added
    foreach ($linediff->adds as $addedline) {
        $added = array_merge($added, $lines2[$addedline]->words);
    }
    // Changes get diffed at the individual-word level
    foreach ($linediff->changes as $changerange) {
        // Build list of all words in each side of the range
        $file1words = array();
        for ($index = $changerange->file1start; $index < $changerange->file1start + $changerange->file1count; $index++) {
            foreach ($lines1[$index]->words as $word) {
                $file1words[] = $word;
            }
        }
        $file2words = array();
        for ($index = $changerange->file2start; $index < $changerange->file2start + $changerange->file2count; $index++) {
            foreach ($lines2[$index]->words as $word) {
                $file2words[] = $word;
            }
        }
        // Make arrays 1-based
        array_unshift($file1words, 'dummy');
        unset($file1words[0]);
        array_unshift($file2words, 'dummy');
        unset($file2words[0]);
        // Convert word lists into plain strings
        $file1strings = array();
        foreach ($file1words as $index => $word) {
            $file1strings[$index] = $word->word;
        }
        $file2strings = array();
        foreach ($file2words as $index => $word) {
            $file2strings[$index] = $word->word;
        }
        // Run diff on strings
        $worddiff = ouwiki_diff($file1strings, $file2strings);
        foreach ($worddiff->adds as $index) {
            $added[] = $file2words[$index];
        }
        foreach ($worddiff->deletes as $index) {
            $deleted[] = $file1words[$index];
        }
        foreach ($worddiff->changes as $changerange) {
            for ($index = $changerange->file1start; $index < $changerange->file1start + $changerange->file1count; $index++) {
                $deleted[] = $file1words[$index];
            }
            for ($index = $changerange->file2start; $index < $changerange->file2start + $changerange->file2count; $index++) {
                $added[] = $file2words[$index];
            }
        }
    }
    return array($deleted, $added);
}
 function test_splitter()
 {
     $lines = ouwiki_diff_html_to_lines($this->html1);
     $this->assertEqual(ouwiki_line::get_as_strings($lines), array(1 => "This is a long paragraph split over several lines and including bold and italic and span tags.", 2 => "This is a second paragraph.", 3 => "This div contain's some greengrocer's apostrophe's.", 4 => "A list", 5 => "With multiple items", 6 => "Some of them have multiple line breaks"));
     $lines = ouwiki_diff_html_to_lines($this->html2);
     $this->assertEqual(ouwiki_line::get_as_strings($lines), array(1 => "This is a long paragraph split over several lines and including bold and italic and span tags.", 2 => "This is a second paragraph which I have added some text to.", 3 => "This div contain's some greengrocer's apostrophe's.", 4 => "A", 5 => "Some of them have multiple line breaks"));
 }