/** * Compares two files based initially on lines and then on words within the lines that * differ. * @param array $lines1 Array of ouwiki_line * @param array $lines2 Array of ouwiki_line * @return array (deleted,added); deleted and added are arrays of ouwiki_word with * position numbers from $lines1 and $lines2 respectively */ function ouwiki_diff_words($lines1, $lines2) { // Prepare arrays $deleted = array(); $added = array(); // Get line difference $linediff = ouwiki_diff(ouwiki_line::get_as_strings($lines1), ouwiki_line::get_as_strings($lines2)); // Handle lines that were entirely deleted foreach ($linediff->deletes as $deletedline) { $deleted = array_merge($deleted, $lines1[$deletedline]->words); } // And ones that were entirely added foreach ($linediff->adds as $addedline) { $added = array_merge($added, $lines2[$addedline]->words); } // Changes get diffed at the individual-word level foreach ($linediff->changes as $changerange) { // Build list of all words in each side of the range $file1words = array(); for ($index = $changerange->file1start; $index < $changerange->file1start + $changerange->file1count; $index++) { foreach ($lines1[$index]->words as $word) { $file1words[] = $word; } } $file2words = array(); for ($index = $changerange->file2start; $index < $changerange->file2start + $changerange->file2count; $index++) { foreach ($lines2[$index]->words as $word) { $file2words[] = $word; } } // Make arrays 1-based array_unshift($file1words, 'dummy'); unset($file1words[0]); array_unshift($file2words, 'dummy'); unset($file2words[0]); // Convert word lists into plain strings $file1strings = array(); foreach ($file1words as $index => $word) { $file1strings[$index] = $word->word; } $file2strings = array(); foreach ($file2words as $index => $word) { $file2strings[$index] = $word->word; } // Run diff on strings $worddiff = ouwiki_diff($file1strings, $file2strings); foreach ($worddiff->adds as $index) { $added[] = $file2words[$index]; } foreach ($worddiff->deletes as $index) { $deleted[] = $file1words[$index]; } foreach ($worddiff->changes as $changerange) { for ($index = $changerange->file1start; $index < $changerange->file1start + $changerange->file1count; $index++) { $deleted[] = $file1words[$index]; } for ($index = $changerange->file2start; $index < $changerange->file2start + $changerange->file2count; $index++) { $added[] = $file2words[$index]; } } } return array($deleted, $added); }
function test_splitter() { $lines = ouwiki_diff_html_to_lines($this->html1); $this->assertEqual(ouwiki_line::get_as_strings($lines), array(1 => "This is a long paragraph split over several lines and including bold and italic and span tags.", 2 => "This is a second paragraph.", 3 => "This div contain's some greengrocer's apostrophe's.", 4 => "A list", 5 => "With multiple items", 6 => "Some of them have multiple line breaks")); $lines = ouwiki_diff_html_to_lines($this->html2); $this->assertEqual(ouwiki_line::get_as_strings($lines), array(1 => "This is a long paragraph split over several lines and including bold and italic and span tags.", 2 => "This is a second paragraph which I have added some text to.", 3 => "This div contain's some greengrocer's apostrophe's.", 4 => "A", 5 => "Some of them have multiple line breaks")); }