public function testCleanupSemantic() { // Cleanup semantically trivial equalities. // Null case. $this->d->setChanges(array()); $this->d->cleanupSemantic(); $this->assertEquals(array(), $this->d->getChanges()); // No elimination #1. $this->d->setChanges(array(array(Diff::DELETE, "ab"), array(Diff::INSERT, "cd"), array(Diff::EQUAL, "12"), array(Diff::DELETE, "e"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "ab"), array(Diff::INSERT, "cd"), array(Diff::EQUAL, "12"), array(Diff::DELETE, "e")), $this->d->getChanges()); // No elimination #2. $this->d->setChanges(array(array(Diff::DELETE, "abc"), array(Diff::INSERT, "ABC"), array(Diff::EQUAL, "1234"), array(Diff::DELETE, "wxyz"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abc"), array(Diff::INSERT, "ABC"), array(Diff::EQUAL, "1234"), array(Diff::DELETE, "wxyz")), $this->d->getChanges()); // Simple elimination. $this->d->setChanges(array(array(Diff::DELETE, "a"), array(Diff::EQUAL, "b"), array(Diff::DELETE, "c"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abc"), array(Diff::INSERT, "b")), $this->d->getChanges()); // Backpass elimination. $this->d->setChanges(array(array(Diff::DELETE, "ab"), array(Diff::EQUAL, "cd"), array(Diff::DELETE, "e"), array(Diff::EQUAL, "f"), array(Diff::INSERT, "g"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abcdef"), array(Diff::INSERT, "cdfg")), $this->d->getChanges()); // Multiple eliminations. $this->d->setChanges(array(array(Diff::INSERT, "1"), array(Diff::EQUAL, "A"), array(Diff::DELETE, "B"), array(Diff::INSERT, "2"), array(Diff::EQUAL, "_"), array(Diff::INSERT, "1"), array(Diff::EQUAL, "A"), array(Diff::DELETE, "B"), array(Diff::INSERT, "2"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "AB_AB"), array(Diff::INSERT, "1A2_1A2")), $this->d->getChanges()); // Word boundaries. $this->d->setChanges(array(array(Diff::EQUAL, "The c"), array(Diff::DELETE, "ow and the c"), array(Diff::EQUAL, "at."))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::EQUAL, "The "), array(Diff::DELETE, "cow and the "), array(Diff::EQUAL, "cat.")), $this->d->getChanges()); // No overlap elimination. $this->d->setChanges(array(array(Diff::DELETE, "abcxx"), array(Diff::INSERT, "xxdef"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abcxx"), array(Diff::INSERT, "xxdef")), $this->d->getChanges()); // Overlap elimination. $this->d->setChanges(array(array(Diff::DELETE, "abcxxx"), array(Diff::INSERT, "xxxdef"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abc"), array(Diff::EQUAL, "xxx"), array(Diff::INSERT, "def")), $this->d->getChanges()); // Reverse overlap elimination. $this->d->setChanges(array(array(Diff::DELETE, "xxxabc"), array(Diff::INSERT, "defxxx"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::INSERT, "def"), array(Diff::EQUAL, "xxx"), array(Diff::DELETE, "abc")), $this->d->getChanges()); // Two overlap eliminations. $this->d->setChanges(array(array(Diff::DELETE, "abcd1212"), array(Diff::INSERT, "1212efghi"), array(Diff::EQUAL, "----"), array(Diff::DELETE, "A3"), array(Diff::INSERT, "3BC"))); $this->d->cleanupSemantic(); $this->assertEquals(array(array(Diff::DELETE, "abcd"), array(Diff::EQUAL, "1212"), array(Diff::INSERT, "efghi"), array(Diff::EQUAL, "----"), array(Diff::DELETE, "A"), array(Diff::EQUAL, "3"), array(Diff::INSERT, "BC")), $this->d->getChanges()); }
/** * Do a quick line-level diff on both strings, then rediff the parts for greater accuracy. * This speedup can produce non-minimal diffs. * * @param string $text1 Old string to be diffed. * @param string $text2 New string to be diffed. * @param int $deadline Time when the diff should be complete by. * * @return array Array of changes. */ protected function lineMode($text1, $text2, $deadline) { // Scan the text on a line-by-line basis first. list($text1, $text2, $lineArray) = $this->getToolkit()->linesToChars($text1, $text2); $diff = new Diff(); $diff->main($text1, $text2, false, $deadline); $diffs = $diff->getChanges(); // Convert the diff back to original text. $this->getToolkit()->charsToLines($diffs, $lineArray); $diff->setChanges($diffs); // Eliminate freak matches (e.g. blank lines) $diff->cleanupSemantic(); $diffs = $diff->getChanges(); // Rediff any replacement blocks, this time character-by-character. // Add a dummy entry at the end. array_push($diffs, array(self::EQUAL, '')); $pointer = 0; $countDelete = 0; $countInsert = 0; $textDelete = ''; $textInsert = ''; while ($pointer < count($diffs)) { switch ($diffs[$pointer][0]) { case self::DELETE: $countDelete++; $textDelete .= $diffs[$pointer][1]; break; case self::INSERT: $countInsert++; $textInsert .= $diffs[$pointer][1]; break; case self::EQUAL: // Upon reaching an equality, check for prior redundancies. if ($countDelete > 0 && $countInsert > 0) { // Delete the offending records and add the merged ones. $subDiff = new Diff(); $subDiff->main($textDelete, $textInsert, false, $deadline); array_splice($diffs, $pointer - $countDelete - $countInsert, $countDelete + $countInsert, $subDiff->getChanges()); $pointer = $pointer - $countDelete - $countInsert + count($subDiff->getChanges()); } $countDelete = 0; $countInsert = 0; $textDelete = ''; $textInsert = ''; break; } $pointer++; } // Remove the dummy entry at the end. array_pop($diffs); return $diffs; }
/** * Reduce the number of edits by eliminating semantically trivial equalities. * Modifies $diffs. * * @param array $diffs Array of diff arrays. */ public function diff_cleanupSemantic(&$diffs) { $this->diff->setChanges($diffs); $this->diff->cleanupSemantic(); $diffs = $this->diff->getChanges(); }