예제 #1
0
 protected function parseChangeset(ArcanistDiffChange $change)
 {
     // If a diff includes two sets of changes to the same file, let the
     // second one win. In particular, this occurs when adding subdirectories
     // in Subversion that contain files: the file text will be present in
     // both the directory diff and the file diff. See T5555. Dropping the
     // hunks lets whichever one shows up later win instead of showing changes
     // twice.
     $change->dropHunks();
     $all_changes = array();
     do {
         $hunk = new ArcanistDiffHunk();
         $line = $this->getLineTrimmed();
         $real = array();
         // In the case where only one line is changed, the length is omitted.
         // The final group is for git, which appends a guess at the function
         // context to the diff.
         $matches = null;
         $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches);
         if (!$ok) {
             // It's possible we hit the style of an svn1.7 property change.
             // This is a 4-line Index block, followed by an empty line, followed
             // by a "Property changes on:" section similar to svn1.6.
             if ($line == '') {
                 $line = $this->nextNonemptyLine();
                 $ok = preg_match('/^Property changes on:/', $line);
                 if (!$ok) {
                     $this->didFailParse(pht('Confused by empty line'));
                 }
                 $line = $this->nextLine();
                 return $this->parsePropertyHunk($change);
             }
             $this->didFailParse(pht("Expected hunk header '%s'.", '@@ -NN,NN +NN,NN @@'));
         }
         $hunk->setOldOffset($matches[1]);
         $hunk->setNewOffset($matches[3]);
         // Cover for the cases where length wasn't present (implying one line).
         $old_len = idx($matches, 2);
         if (!strlen($old_len)) {
             $old_len = 1;
         }
         $new_len = idx($matches, 4);
         if (!strlen($new_len)) {
             $new_len = 1;
         }
         $hunk->setOldLength($old_len);
         $hunk->setNewLength($new_len);
         $add = 0;
         $del = 0;
         $hit_next_hunk = false;
         while (($line = $this->nextLine()) !== null) {
             if (strlen(rtrim($line, "\r\n"))) {
                 $char = $line[0];
             } else {
                 // Normally, we do not encouter empty lines in diffs, because
                 // unchanged lines have an initial space. However, in Git, with
                 // the option `diff.suppress-blank-empty` set, unchanged blank lines
                 // emit as completely empty. If we encounter a completely empty line,
                 // treat it as a ' ' (i.e., unchanged empty line) line.
                 $char = ' ';
             }
             switch ($char) {
                 case '\\':
                     if (!preg_match('@\\ No newline at end of file@', $line)) {
                         $this->didFailParse(pht("Expected '\\ No newline at end of file'."));
                     }
                     if ($new_len) {
                         $real[] = $line;
                         $hunk->setIsMissingOldNewline(true);
                     } else {
                         $real[] = $line;
                         $hunk->setIsMissingNewNewline(true);
                     }
                     if (!$new_len) {
                         break 2;
                     }
                     break;
                 case '+':
                     ++$add;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '-':
                     if (!$old_len) {
                         // In this case, we've hit "---" from a new file. So don't
                         // advance the line cursor.
                         $hit_next_hunk = true;
                         break 2;
                     }
                     ++$del;
                     --$old_len;
                     $real[] = $line;
                     break;
                 case ' ':
                     if (!$old_len && !$new_len) {
                         break 2;
                     }
                     --$old_len;
                     --$new_len;
                     $real[] = $line;
                     break;
                 default:
                     // We hit something, likely another hunk.
                     $hit_next_hunk = true;
                     break 2;
             }
         }
         if ($old_len || $new_len) {
             $this->didFailParse(pht('Found the wrong number of hunk lines.'));
         }
         $corpus = implode('', $real);
         $is_binary = false;
         if ($this->detectBinaryFiles) {
             $is_binary = !phutil_is_utf8($corpus);
             $try_encoding = $this->tryEncoding;
             if ($is_binary && $try_encoding) {
                 $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
                 if (!$is_binary) {
                     $corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding);
                     if (!phutil_is_utf8($corpus)) {
                         throw new Exception(pht("Failed to convert a hunk from '%s' to UTF-8. " . "Check that the specified encoding is correct.", $try_encoding));
                     }
                 }
             }
         }
         if ($is_binary) {
             // SVN happily treats binary files which aren't marked with the right
             // mime type as text files. Detect that junk here and mark the file
             // binary. We'll catch stuff with unicode too, but that's verboten
             // anyway. If there are too many false positives with this we might
             // need to make it threshold-triggered instead of triggering on any
             // unprintable byte.
             $change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
         } else {
             $hunk->setCorpus($corpus);
             $hunk->setAddLines($add);
             $hunk->setDelLines($del);
             $change->addHunk($hunk);
         }
         if (!$hit_next_hunk) {
             $line = $this->nextNonemptyLine();
         }
     } while (preg_match('/^@@ /', $line));
 }
예제 #2
0
 protected function parseChangeset(ArcanistDiffChange $change)
 {
     $all_changes = array();
     do {
         $hunk = new ArcanistDiffHunk();
         $line = $this->getLine();
         $real = array();
         // In the case where only one line is changed, the length is omitted.
         // The final group is for git, which appends a guess at the function
         // context to the diff.
         $matches = null;
         $ok = preg_match('/^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@(?: .*?)?$/U', $line, $matches);
         if (!$ok) {
             // It's possible we hit the style of an svn1.7 property change.
             // This is a 4-line Index block, followed by an empty line, followed
             // by a "Property changes on:" section similar to svn1.6.
             if ($line == '') {
                 $line = $this->nextNonemptyLine();
                 $ok = preg_match('/^Property changes on:/', $line);
                 if (!$ok) {
                     $this->didFailParse("Confused by empty line");
                 }
                 $line = $this->nextLine();
                 return $this->parsePropertyHunk($change);
             }
             $this->didFailParse("Expected hunk header '@@ -NN,NN +NN,NN @@'.");
         }
         $hunk->setOldOffset($matches[1]);
         $hunk->setNewOffset($matches[3]);
         // Cover for the cases where length wasn't present (implying one line).
         $old_len = idx($matches, 2);
         if (!strlen($old_len)) {
             $old_len = 1;
         }
         $new_len = idx($matches, 4);
         if (!strlen($new_len)) {
             $new_len = 1;
         }
         $hunk->setOldLength($old_len);
         $hunk->setNewLength($new_len);
         $add = 0;
         $del = 0;
         $advance = false;
         while (($line = $this->nextLine()) !== null) {
             if (strlen($line)) {
                 $char = $line[0];
             } else {
                 $char = '~';
             }
             switch ($char) {
                 case '\\':
                     if (!preg_match('@\\ No newline at end of file@', $line)) {
                         $this->didFailParse("Expected '\\ No newline at end of file'.");
                     }
                     if ($new_len) {
                         $real[] = $line;
                         $hunk->setIsMissingOldNewline(true);
                     } else {
                         $real[] = $line;
                         $hunk->setIsMissingNewNewline(true);
                     }
                     if (!$new_len) {
                         $advance = true;
                         break 2;
                     }
                     break;
                 case '+':
                     if (!$new_len) {
                         break 2;
                     }
                     ++$add;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '-':
                     if (!$old_len) {
                         break 2;
                     }
                     ++$del;
                     --$old_len;
                     $real[] = $line;
                     break;
                 case ' ':
                     if (!$old_len && !$new_len) {
                         break 2;
                     }
                     --$old_len;
                     --$new_len;
                     $real[] = $line;
                     break;
                 case '~':
                     $advance = true;
                     break 2;
                 default:
                     break 2;
             }
         }
         if ($old_len != 0 || $new_len != 0) {
             $this->didFailParse("Found the wrong number of hunk lines.");
         }
         $corpus = implode("\n", $real);
         $is_binary = false;
         if ($this->detectBinaryFiles) {
             $is_binary = !phutil_is_utf8($corpus);
             if ($is_binary && $this->tryEncoding) {
                 $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
                 if (!$is_binary) {
                     // NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
                     // of issues. Use it at your own risk.
                     $corpus = mb_convert_encoding($corpus, 'UTF-8', $this->tryEncoding);
                     if (!phutil_is_utf8($corpus)) {
                         throw new Exception('Failed converting hunk to ' . $this->tryEncoding);
                     }
                 }
             }
         }
         if ($is_binary) {
             // SVN happily treats binary files which aren't marked with the right
             // mime type as text files. Detect that junk here and mark the file
             // binary. We'll catch stuff with unicode too, but that's verboten
             // anyway. If there are too many false positives with this we might
             // need to make it threshold-triggered instead of triggering on any
             // unprintable byte.
             $change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
         } else {
             $hunk->setCorpus($corpus);
             $hunk->setAddLines($add);
             $hunk->setDelLines($del);
             $change->addHunk($hunk);
         }
         if ($advance) {
             $line = $this->nextNonemptyLine();
         }
     } while (preg_match('/^@@ /', $line));
 }