protected function parseChangeset(ArcanistDiffChange $change) { // If a diff includes two sets of changes to the same file, let the // second one win. In particular, this occurs when adding subdirectories // in Subversion that contain files: the file text will be present in // both the directory diff and the file diff. See T5555. Dropping the // hunks lets whichever one shows up later win instead of showing changes // twice. $change->dropHunks(); $all_changes = array(); do { $hunk = new ArcanistDiffHunk(); $line = $this->getLineTrimmed(); $real = array(); // In the case where only one line is changed, the length is omitted. // The final group is for git, which appends a guess at the function // context to the diff. $matches = null; $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches); if (!$ok) { // It's possible we hit the style of an svn1.7 property change. // This is a 4-line Index block, followed by an empty line, followed // by a "Property changes on:" section similar to svn1.6. if ($line == '') { $line = $this->nextNonemptyLine(); $ok = preg_match('/^Property changes on:/', $line); if (!$ok) { $this->didFailParse(pht('Confused by empty line')); } $line = $this->nextLine(); return $this->parsePropertyHunk($change); } $this->didFailParse(pht("Expected hunk header '%s'.", '@@ -NN,NN +NN,NN @@')); } $hunk->setOldOffset($matches[1]); $hunk->setNewOffset($matches[3]); // Cover for the cases where length wasn't present (implying one line). $old_len = idx($matches, 2); if (!strlen($old_len)) { $old_len = 1; } $new_len = idx($matches, 4); if (!strlen($new_len)) { $new_len = 1; } $hunk->setOldLength($old_len); $hunk->setNewLength($new_len); $add = 0; $del = 0; $hit_next_hunk = false; while (($line = $this->nextLine()) !== null) { if (strlen(rtrim($line, "\r\n"))) { $char = $line[0]; } else { // Normally, we do not encouter empty lines in diffs, because // unchanged lines have an initial space. However, in Git, with // the option `diff.suppress-blank-empty` set, unchanged blank lines // emit as completely empty. If we encounter a completely empty line, // treat it as a ' ' (i.e., unchanged empty line) line. $char = ' '; } switch ($char) { case '\\': if (!preg_match('@\\ No newline at end of file@', $line)) { $this->didFailParse(pht("Expected '\\ No newline at end of file'.")); } if ($new_len) { $real[] = $line; $hunk->setIsMissingOldNewline(true); } else { $real[] = $line; $hunk->setIsMissingNewNewline(true); } if (!$new_len) { break 2; } break; case '+': ++$add; --$new_len; $real[] = $line; break; case '-': if (!$old_len) { // In this case, we've hit "---" from a new file. So don't // advance the line cursor. $hit_next_hunk = true; break 2; } ++$del; --$old_len; $real[] = $line; break; case ' ': if (!$old_len && !$new_len) { break 2; } --$old_len; --$new_len; $real[] = $line; break; default: // We hit something, likely another hunk. $hit_next_hunk = true; break 2; } } if ($old_len || $new_len) { $this->didFailParse(pht('Found the wrong number of hunk lines.')); } $corpus = implode('', $real); $is_binary = false; if ($this->detectBinaryFiles) { $is_binary = !phutil_is_utf8($corpus); $try_encoding = $this->tryEncoding; if ($is_binary && $try_encoding) { $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus); if (!$is_binary) { $corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding); if (!phutil_is_utf8($corpus)) { throw new Exception(pht("Failed to convert a hunk from '%s' to UTF-8. " . "Check that the specified encoding is correct.", $try_encoding)); } } } } if ($is_binary) { // SVN happily treats binary files which aren't marked with the right // mime type as text files. Detect that junk here and mark the file // binary. We'll catch stuff with unicode too, but that's verboten // anyway. If there are too many false positives with this we might // need to make it threshold-triggered instead of triggering on any // unprintable byte. $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); } else { $hunk->setCorpus($corpus); $hunk->setAddLines($add); $hunk->setDelLines($del); $change->addHunk($hunk); } if (!$hit_next_hunk) { $line = $this->nextNonemptyLine(); } } while (preg_match('/^@@ /', $line)); }
protected function parseChangeset(ArcanistDiffChange $change) { $all_changes = array(); do { $hunk = new ArcanistDiffHunk(); $line = $this->getLine(); $real = array(); // In the case where only one line is changed, the length is omitted. // The final group is for git, which appends a guess at the function // context to the diff. $matches = null; $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches); if (!$ok) { // It's possible we hit the style of an svn1.7 property change. // This is a 4-line Index block, followed by an empty line, followed // by a "Property changes on:" section similar to svn1.6. if ($line == '') { $line = $this->nextNonemptyLine(); $ok = preg_match('/^Property changes on:/', $line); if (!$ok) { $this->didFailParse("Confused by empty line"); } $line = $this->nextLine(); return $this->parsePropertyHunk($change); } $this->didFailParse("Expected hunk header '@@ -NN,NN +NN,NN @@'."); } $hunk->setOldOffset($matches[1]); $hunk->setNewOffset($matches[3]); // Cover for the cases where length wasn't present (implying one line). $old_len = idx($matches, 2); if (!strlen($old_len)) { $old_len = 1; } $new_len = idx($matches, 4); if (!strlen($new_len)) { $new_len = 1; } $hunk->setOldLength($old_len); $hunk->setNewLength($new_len); $add = 0; $del = 0; $advance = false; while (($line = $this->nextLine()) !== null) { if (strlen($line)) { $char = $line[0]; } else { $char = '~'; } switch ($char) { case '\\': if (!preg_match('@\\ No newline at end of file@', $line)) { $this->didFailParse("Expected '\\ No newline at end of file'."); } if ($new_len) { $real[] = $line; $hunk->setIsMissingOldNewline(true); } else { $real[] = $line; $hunk->setIsMissingNewNewline(true); } if (!$new_len) { $advance = true; break 2; } break; case '+': if (!$new_len) { break 2; } ++$add; --$new_len; $real[] = $line; break; case '-': if (!$old_len) { break 2; } ++$del; --$old_len; $real[] = $line; break; case ' ': if (!$old_len && !$new_len) { break 2; } --$old_len; --$new_len; $real[] = $line; break; case '~': $advance = true; break 2; default: break 2; } } if ($old_len != 0 || $new_len != 0) { $this->didFailParse("Found the wrong number of hunk lines."); } $corpus = implode("\n", $real); $is_binary = false; if ($this->detectBinaryFiles) { $is_binary = !phutil_is_utf8($corpus); if ($is_binary && $this->tryEncoding) { $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus); if (!$is_binary) { // NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot // of issues. Use it at your own risk. $corpus = mb_convert_encoding($corpus, 'UTF-8', $this->tryEncoding); if (!phutil_is_utf8($corpus)) { throw new Exception('Failed converting hunk to ' . $this->tryEncoding); } } } } if ($is_binary) { // SVN happily treats binary files which aren't marked with the right // mime type as text files. Detect that junk here and mark the file // binary. We'll catch stuff with unicode too, but that's verboten // anyway. If there are too many false positives with this we might // need to make it threshold-triggered instead of triggering on any // unprintable byte. $change->setFileType(ArcanistDiffChangeType::FILE_BINARY); } else { $hunk->setCorpus($corpus); $hunk->setAddLines($add); $hunk->setDelLines($del); $change->addHunk($hunk); } if ($advance) { $line = $this->nextNonemptyLine(); } } while (preg_match('/^@@ /', $line)); }