Example #1
0
 public static function newFromDictionary(array $dict)
 {
     $hunks = array();
     foreach ($dict['hunks'] as $hunk) {
         $hunks[] = ArcanistDiffHunk::newFromDictionary($hunk);
     }
     $obj = new ArcanistDiffChange();
     $obj->metadata = $dict['metadata'];
     $obj->oldPath = $dict['oldPath'];
     $obj->currentPath = $dict['currentPath'];
     // TODO: The backend is shipping down some bogus data, e.g. diff 199453.
     // Should probably clean this up.
     $obj->awayPaths = nonempty($dict['awayPaths'], array());
     $obj->oldProperties = nonempty($dict['oldProperties'], array());
     $obj->newProperties = nonempty($dict['newProperties'], array());
     $obj->type = $dict['type'];
     $obj->fileType = $dict['fileType'];
     $obj->commitHash = $dict['commitHash'];
     $obj->hunks = $hunks;
     return $obj;
 }
 private function breakHunkIntoSmallHunks(ArcanistDiffHunk $base_hunk)
 {
     $context = 3;
     $results = array();
     $lines = phutil_split_lines($base_hunk->getCorpus());
     $n = count($lines);
     $old_offset = $base_hunk->getOldOffset();
     $new_offset = $base_hunk->getNewOffset();
     $ii = 0;
     $jj = 0;
     while ($ii < $n) {
         // Skip lines until we find the next line with changes. Note: this skips
         // both ' ' (no changes) and '\' (no newline at end of file) lines. If we
         // don't skip the latter, we may incorrectly generate a terminal hunk
         // that has no actual change information when a file doesn't have a
         // terminal newline and not changed near the end of the file. 'patch' will
         // fail to apply the diff if we generate a hunk that does not actually
         // contain changes.
         for ($jj = $ii; $jj < $n; ++$jj) {
             $char = $lines[$jj][0];
             if ($char == '-' || $char == '+') {
                 break;
             }
         }
         if ($jj >= $n) {
             break;
         }
         $hunk_start = max($jj - $context, 0);
         // NOTE: There are two tricky considerations here.
         // We can not generate a patch with overlapping hunks, or 'git apply'
         // rejects it after 1.7.3.4.
         // We can not generate a patch with too much trailing context, or
         // 'patch' rejects it.
         // So we need to ensure that we generate disjoint hunks, but don't
         // generate any hunks with too much context.
         $old_lines = 0;
         $new_lines = 0;
         $hunk_adjust = 0;
         $last_change = $jj;
         $break_here = null;
         for (; $jj < $n; ++$jj) {
             if ($lines[$jj][0] == ' ') {
                 if ($jj - $last_change > $context) {
                     if ($break_here === null) {
                         // We haven't seen a change in $context lines, so this is a
                         // potential place to break the hunk. However, we need to keep
                         // looking in case there is another change fewer than $context
                         // lines away, in which case we have to merge the hunks.
                         $break_here = $jj;
                     }
                 }
                 if ($jj - $last_change > ($context + 1) * 2) {
                     // We definitely aren't going to merge this with the next hunk, so
                     // break out of the loop. We'll end the hunk at $break_here.
                     break;
                 }
             } else {
                 $break_here = null;
                 $last_change = $jj;
                 if ($lines[$jj][0] == '\\') {
                     // When we have a "\ No newline at end of file" line, it does not
                     // contribute to either hunk length.
                     ++$hunk_adjust;
                 } else {
                     if ($lines[$jj][0] == '-') {
                         ++$old_lines;
                     } else {
                         if ($lines[$jj][0] == '+') {
                             ++$new_lines;
                         }
                     }
                 }
             }
         }
         if ($break_here !== null) {
             $jj = $break_here;
         }
         $hunk_length = min($jj, $n) - $hunk_start;
         $count_length = $hunk_length - $hunk_adjust;
         $hunk = new ArcanistDiffHunk();
         $hunk->setOldOffset($old_offset + $hunk_start - $ii);
         $hunk->setNewOffset($new_offset + $hunk_start - $ii);
         $hunk->setOldLength($count_length - $new_lines);
         $hunk->setNewLength($count_length - $old_lines);
         $corpus = array_slice($lines, $hunk_start, $hunk_length);
         $corpus = implode('', $corpus);
         $hunk->setCorpus($corpus);
         $results[] = $hunk;
         $old_offset += $jj - $ii - $new_lines;
         $new_offset += $jj - $ii - $old_lines;
         $ii = $jj;
     }
     return $results;
 }
 protected function parseChangeset(ArcanistDiffChange $change)
 {
     // If a diff includes two sets of changes to the same file, let the
     // second one win. In particular, this occurs when adding subdirectories
     // in Subversion that contain files: the file text will be present in
     // both the directory diff and the file diff. See T5555. Dropping the
     // hunks lets whichever one shows up later win instead of showing changes
     // twice.
     $change->dropHunks();
     $all_changes = array();
     do {
         $hunk = new ArcanistDiffHunk();
         $line = $this->getLineTrimmed();
         $real = array();
         // In the case where only one line is changed, the length is omitted.
         // The final group is for git, which appends a guess at the function
         // context to the diff.
         $matches = null;
         $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches);
         if (!$ok) {
             // It's possible we hit the style of an svn1.7 property change.
             // This is a 4-line Index block, followed by an empty line, followed
             // by a "Property changes on:" section similar to svn1.6.
             if ($line == '') {
                 $line = $this->nextNonemptyLine();
                 $ok = preg_match('/^Property changes on:/', $line);
                 if (!$ok) {
                     $this->didFailParse(pht('Confused by empty line'));
                 }
                 $line = $this->nextLine();
                 return $this->parsePropertyHunk($change);
             }
             $this->didFailParse(pht("Expected hunk header '%s'.", '@@ -NN,NN +NN,NN @@'));
         }
         $hunk->setOldOffset($matches[1]);
         $hunk->setNewOffset($matches[3]);
         // Cover for the cases where length wasn't present (implying one line).
         $old_len = idx($matches, 2);
         if (!strlen($old_len)) {
             $old_len = 1;
         }
         $new_len = idx($matches, 4);
         if (!strlen($new_len)) {
             $new_len = 1;
         }
         $hunk->setOldLength($old_len);
         $hunk->setNewLength($new_len);
         $add = 0;
         $del = 0;
         $hit_next_hunk = false;
         while (($line = $this->nextLine()) !== null) {
             if (strlen(rtrim($line, "\r\n"))) {
                 $char = $line[0];
             } else {
                 // Normally, we do not encouter empty lines in diffs, because
                 // unchanged lines have an initial space. However, in Git, with
                 // the option `diff.suppress-blank-empty` set, unchanged blank lines
                 // emit as completely empty. If we encounter a completely empty line,
                 // treat it as a ' ' (i.e., unchanged empty line) line.
                 $char = ' ';
             }
             switch ($char) {
                 case '\\':
                     if (!preg_match('@\\ No newline at end of file@', $line)) {
                         $this->didFailParse(pht("Expected '\\ No newline at end of file'."));
                     }
                     if ($new_len) {
                         $real[] = $line;
                         $hunk->setIsMissingOldNewline(true);
                     } else {
                         $real[] = $line;
                         $hunk->setIsMissingNewNewline(true);
                     }
                     if (!$new_len) {
                         break 2;
                     }
                     break;
                 case '+':
                     ++$add;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '-':
                     if (!$old_len) {
                         // In this case, we've hit "---" from a new file. So don't
                         // advance the line cursor.
                         $hit_next_hunk = true;
                         break 2;
                     }
                     ++$del;
                     --$old_len;
                     $real[] = $line;
                     break;
                 case ' ':
                     if (!$old_len && !$new_len) {
                         break 2;
                     }
                     --$old_len;
                     --$new_len;
                     $real[] = $line;
                     break;
                 default:
                     // We hit something, likely another hunk.
                     $hit_next_hunk = true;
                     break 2;
             }
         }
         if ($old_len || $new_len) {
             $this->didFailParse(pht('Found the wrong number of hunk lines.'));
         }
         $corpus = implode('', $real);
         $is_binary = false;
         if ($this->detectBinaryFiles) {
             $is_binary = !phutil_is_utf8($corpus);
             $try_encoding = $this->tryEncoding;
             if ($is_binary && $try_encoding) {
                 $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
                 if (!$is_binary) {
                     $corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding);
                     if (!phutil_is_utf8($corpus)) {
                         throw new Exception(pht("Failed to convert a hunk from '%s' to UTF-8. " . "Check that the specified encoding is correct.", $try_encoding));
                     }
                 }
             }
         }
         if ($is_binary) {
             // SVN happily treats binary files which aren't marked with the right
             // mime type as text files. Detect that junk here and mark the file
             // binary. We'll catch stuff with unicode too, but that's verboten
             // anyway. If there are too many false positives with this we might
             // need to make it threshold-triggered instead of triggering on any
             // unprintable byte.
             $change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
         } else {
             $hunk->setCorpus($corpus);
             $hunk->setAddLines($add);
             $hunk->setDelLines($del);
             $change->addHunk($hunk);
         }
         if (!$hit_next_hunk) {
             $line = $this->nextNonemptyLine();
         }
     } while (preg_match('/^@@ /', $line));
 }
Example #4
0
 protected function parseChangeset(ArcanistDiffChange $change)
 {
     $all_changes = array();
     do {
         $hunk = new ArcanistDiffHunk();
         $line = $this->getLine();
         $real = array();
         // In the case where only one line is changed, the length is omitted.
         // The final group is for git, which appends a guess at the function
         // context to the diff.
         $matches = null;
         $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches);
         if (!$ok) {
             // It's possible we hit the style of an svn1.7 property change.
             // This is a 4-line Index block, followed by an empty line, followed
             // by a "Property changes on:" section similar to svn1.6.
             if ($line == '') {
                 $line = $this->nextNonemptyLine();
                 $ok = preg_match('/^Property changes on:/', $line);
                 if (!$ok) {
                     $this->didFailParse("Confused by empty line");
                 }
                 $line = $this->nextLine();
                 return $this->parsePropertyHunk($change);
             }
             $this->didFailParse("Expected hunk header '@@ -NN,NN +NN,NN @@'.");
         }
         $hunk->setOldOffset($matches[1]);
         $hunk->setNewOffset($matches[3]);
         // Cover for the cases where length wasn't present (implying one line).
         $old_len = idx($matches, 2);
         if (!strlen($old_len)) {
             $old_len = 1;
         }
         $new_len = idx($matches, 4);
         if (!strlen($new_len)) {
             $new_len = 1;
         }
         $hunk->setOldLength($old_len);
         $hunk->setNewLength($new_len);
         $add = 0;
         $del = 0;
         $advance = false;
         while (($line = $this->nextLine()) !== null) {
             if (strlen($line)) {
                 $char = $line[0];
             } else {
                 $char = '~';
             }
             switch ($char) {
                 case '\\':
                     if (!preg_match('@\\ No newline at end of file@', $line)) {
                         $this->didFailParse("Expected '\\ No newline at end of file'.");
                     }
                     if ($new_len) {
                         $real[] = $line;
                         $hunk->setIsMissingOldNewline(true);
                     } else {
                         $real[] = $line;
                         $hunk->setIsMissingNewNewline(true);
                     }
                     if (!$new_len) {
                         $advance = true;
                         break 2;
                     }
                     break;
                 case '+':
                     if (!$new_len) {
                         break 2;
                     }
                     ++$add;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '-':
                     if (!$old_len) {
                         break 2;
                     }
                     ++$del;
                     --$old_len;
                     $real[] = $line;
                     break;
                 case ' ':
                     if (!$old_len && !$new_len) {
                         break 2;
                     }
                     --$old_len;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '~':
                     $advance = true;
                     break 2;
                 default:
                     break 2;
             }
         }
         if ($old_len != 0 || $new_len != 0) {
             $this->didFailParse("Found the wrong number of hunk lines.");
         }
         $corpus = implode("\n", $real);
         $is_binary = false;
         if ($this->detectBinaryFiles) {
             $is_binary = !phutil_is_utf8($corpus);
             if ($is_binary && $this->tryEncoding) {
                 $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
                 if (!$is_binary) {
                     // NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
                     // of issues. Use it at your own risk.
                     $corpus = mb_convert_encoding($corpus, 'UTF-8', $this->tryEncoding);
                     if (!phutil_is_utf8($corpus)) {
                         throw new Exception('Failed converting hunk to ' . $this->tryEncoding);
                     }
                 }
             }
         }
         if ($is_binary) {
             // SVN happily treats binary files which aren't marked with the right
             // mime type as text files. Detect that junk here and mark the file
             // binary. We'll catch stuff with unicode too, but that's verboten
             // anyway. If there are too many false positives with this we might
             // need to make it threshold-triggered instead of triggering on any
             // unprintable byte.
             $change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
         } else {
             $hunk->setCorpus($corpus);
             $hunk->setAddLines($add);
             $hunk->setDelLines($del);
             $change->addHunk($hunk);
         }
         if ($advance) {
             $line = $this->nextNonemptyLine();
         }
     } while (preg_match('/^@@ /', $line));
 }