/** * This is the core algorithm which actually perform the diff itself, * fragmenting the strings as per specified delimiters. * * This function is naturally recursive, however for performance purpose * a local job queue is used instead of outright recursivity. */ private static function doFragmentDiff($from_text, $to_text, $delimiters) { // Empty delimiter means character-level diffing. // In such case, use code path optimized for character-level // diffing. if (empty($delimiters)) { return FineDiff::doCharDiff($from_text, $to_text); } $result = array(); // fragment-level diffing $from_text_len = strlen($from_text); $to_text_len = strlen($to_text); $from_fragments = FineDiff::extractFragments($from_text, $delimiters); $to_fragments = FineDiff::extractFragments($to_text, $delimiters); $jobs = array(array(0, $from_text_len, 0, $to_text_len)); $cached_array_keys = array(); while ($job = array_pop($jobs)) { // get the segments which must be diff'ed list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job; // catch easy cases first $from_segment_length = $from_segment_end - $from_segment_start; $to_segment_length = $to_segment_end - $to_segment_start; if (!$from_segment_length || !$to_segment_length) { if ($from_segment_length) { $result[$from_segment_start * 4] = new FineDiffDeleteOp($from_segment_length); } else { if ($to_segment_length) { $result[$from_segment_start * 4 + 1] = new FineDiffInsertOp(substr($to_text, $to_segment_start, $to_segment_length)); } } continue; } // find longest copy operation for the current segments $best_copy_length = 0; $from_base_fragment_index = $from_segment_start; $cached_array_keys_for_current_segment = array(); while ($from_base_fragment_index < $from_segment_end) { $from_base_fragment = $from_fragments[$from_base_fragment_index]; $from_base_fragment_length = strlen($from_base_fragment); // performance boost: cache array keys if (!isset($cached_array_keys_for_current_segment[$from_base_fragment])) { if (!isset($cached_array_keys[$from_base_fragment])) { $to_all_fragment_indices = $cached_array_keys[$from_base_fragment] = array_keys($to_fragments, $from_base_fragment, true); } else { $to_all_fragment_indices = $cached_array_keys[$from_base_fragment]; } // get only indices which falls within current segment if ($to_segment_start > 0 || $to_segment_end < $to_text_len) { $to_fragment_indices = array(); foreach ($to_all_fragment_indices as $to_fragment_index) { if ($to_fragment_index < $to_segment_start) { continue; } if ($to_fragment_index >= $to_segment_end) { break; } $to_fragment_indices[] = $to_fragment_index; } $cached_array_keys_for_current_segment[$from_base_fragment] = $to_fragment_indices; } else { $to_fragment_indices = $to_all_fragment_indices; } } else { $to_fragment_indices = $cached_array_keys_for_current_segment[$from_base_fragment]; } // iterate through collected indices foreach ($to_fragment_indices as $to_base_fragment_index) { $fragment_index_offset = $from_base_fragment_length; // iterate until no more match for (;;) { $fragment_from_index = $from_base_fragment_index + $fragment_index_offset; if ($fragment_from_index >= $from_segment_end) { break; } $fragment_to_index = $to_base_fragment_index + $fragment_index_offset; if ($fragment_to_index >= $to_segment_end) { break; } if ($from_fragments[$fragment_from_index] !== $to_fragments[$fragment_to_index]) { break; } $fragment_length = strlen($from_fragments[$fragment_from_index]); $fragment_index_offset += $fragment_length; } if ($fragment_index_offset > $best_copy_length) { $best_copy_length = $fragment_index_offset; $best_from_start = $from_base_fragment_index; $best_to_start = $to_base_fragment_index; } } $from_base_fragment_index += strlen($from_base_fragment); // If match is larger than half segment size, no point trying to find better // TODO: Really? if ($best_copy_length >= $from_segment_length / 2) { break; } // no point to keep looking if what is left is less than // current best match if ($from_base_fragment_index + $best_copy_length >= $from_segment_end) { break; } } if ($best_copy_length) { $jobs[] = array($from_segment_start, $best_from_start, $to_segment_start, $best_to_start); $result[$best_from_start * 4 + 2] = new FineDiffCopyOp($best_copy_length); $jobs[] = array($best_from_start + $best_copy_length, $from_segment_end, $best_to_start + $best_copy_length, $to_segment_end); } else { $result[$from_segment_start * 4] = new FineDiffReplaceOp($from_segment_length, substr($to_text, $to_segment_start, $to_segment_length)); } } ksort($result, SORT_NUMERIC); return array_values($result); }