Skip to content

Commit 509a624

Browse files
committed
filter-repo: fix issue with pruning of empty commits
In order to build the correct tree for a commit, git-fast-import always takes a list of file changes for a merge commit relative to the first parent. When the entire first-parent history of a merge commit is pruned away and the merge had paths with no difference relative to the first parent but which differed relative to later parents, then we really need to generate a new list of file changes in order to have one of those other parents become the new first parent. An example might help clarify... Let's say that there is a merge commit, and: * it resolved differences in pathA between its two parents by taking the version of pathA from the first parent. * pathB was added in the history of the second parent (it is not present in the first parent) and is NOT included in the merge commit (either being deleted, or via rename treated as deleted and added as something else) For this merge commit, neither pathA nor pathB differ from the first parent, and thus wouldn't appear in the list of file changes shown by fast-export. However, when our filtering rules determine that the first parent (and all its parents) should be pruned away, then the second parent has to become the new first parent of the merge commit. But to end up with the right files in the merge commit despite using a different parent, we need a list of file changes that specifies the changes for both pathA and pathB. Signed-off-by: Elijah Newren <[email protected]>
1 parent cdec483 commit 509a624

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

git-filter-repo

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ class _GitElement(object):
438438

439439
def __bytes__(self):
440440
"""
441-
Convert GitElement to string; used for debugging
441+
Convert GitElement to bytestring; used for debugging
442442
"""
443443
old_dumped = self.dumped
444444
writeme = io.BytesIO()
@@ -503,6 +503,8 @@ class Blob(_GitElementWithId):
503503
Write this blob element to a file.
504504
"""
505505
self.dumped = 1
506+
HASH_TO_ID[self.original_id] = self.id
507+
ID_TO_HASH[self.id] = self.original_id
506508

507509
file_.write(b'blob\n')
508510
file_.write(b'mark :%d\n' % self.id)
@@ -665,6 +667,8 @@ class Commit(_GitElementWithId):
665667
Write this commit element to a file.
666668
"""
667669
self.dumped = 1
670+
HASH_TO_ID[self.original_id] = self.id
671+
ID_TO_HASH[self.id] = self.original_id
668672

669673
# Make output to fast-import slightly easier for humans to read if the
670674
# message has no trailing newline of its own; cosmetic, but a nice touch...
@@ -754,6 +758,8 @@ class Tag(_GitElementWithId):
754758
"""
755759

756760
self.dumped = 1
761+
HASH_TO_ID[self.original_id] = self.id
762+
ID_TO_HASH[self.id] = self.original_id
757763

758764
file_.write(b'tag %s\n' % self.ref)
759765
if (write_marks and self.id):
@@ -1426,6 +1432,8 @@ def record_id_rename(old_id, new_id):
14261432
# Internal globals
14271433
_IDS = _IDs()
14281434
_SKIPPED_COMMITS = set()
1435+
HASH_TO_ID = {}
1436+
ID_TO_HASH = {}
14291437

14301438
class GitUtils(object):
14311439
@staticmethod
@@ -1513,6 +1521,31 @@ class GitUtils(object):
15131521
blob_size_progress.finish()
15141522
return unpacked_size, packed_size
15151523

1524+
@staticmethod
1525+
def get_file_changes(repo, parent_hash, commit_hash):
1526+
"""
1527+
Return a FileChanges list with the differences between parent_hash
1528+
and commit_hash
1529+
"""
1530+
file_changes = []
1531+
1532+
cmd = ["git", "diff-tree", "-r", parent_hash, commit_hash]
1533+
output = subprocess.check_output(cmd, cwd=repo)
1534+
for line in output.splitlines():
1535+
fileinfo, path = line.split(b'\t', 1)
1536+
if path.startswith(b'"'):
1537+
path = PathQuoting.dequote(path)
1538+
oldmode, mode, oldhash, newhash, changetype = fileinfo.split()
1539+
if changetype == b'D':
1540+
file_changes.append(FileChange(b'D', path))
1541+
elif changetype in (b'A', b'M'):
1542+
identifier = HASH_TO_ID.get(newhash, newhash)
1543+
file_changes.append(FileChange(b'M', path, identifier, mode))
1544+
else: # pragma: no cover
1545+
raise SystemExit("Unknown change type for line {}".format(line))
1546+
1547+
return file_changes
1548+
15161549
class FilteringOptions(object):
15171550
class AppendFilter(argparse.Action):
15181551
def __call__(self, parser, namespace, values, option_string=None):
@@ -3203,9 +3236,17 @@ class RepoFilter(object):
32033236
self._orig_graph.add_commit_and_parents(commit.old_id, orig_parents)
32043237

32053238
# Prune parents (due to pruning of empty commits) if relevant
3239+
old_1st_parent = parents[0] if parents else None
32063240
parents, new_1st_parent = self._trim_extra_parents(orig_parents, parents)
32073241
commit.parents = parents
32083242

3243+
# If parents were pruned, then we need our file changes to be relative
3244+
# to the new first parent
3245+
if parents and old_1st_parent != parents[0]:
3246+
commit.file_changes = GitUtils.get_file_changes(self._repo_working_dir,
3247+
ID_TO_HASH[parents[0]],
3248+
commit.original_id)
3249+
32093250
# Call the user-defined callback, if any
32103251
if self._commit_callback:
32113252
self._commit_callback(commit, self.callback_metadata(aux_info))

t/t9390-filter-repo.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,4 +1309,33 @@ test_expect_success '--state-branch with expanding paths and refs' '
13091309
)
13101310
'
13111311

1312+
test_expect_success 'degenerate merge with non-matching filenames' '
1313+
test_create_repo degenerate_merge_differing_filenames &&
1314+
(
1315+
cd degenerate_merge_differing_filenames &&
1316+
1317+
touch "foo \"quote\" bar" &&
1318+
git add "foo \"quote\" bar" &&
1319+
git commit -m "Add foo \"quote\" bar"
1320+
git branch A &&
1321+
1322+
git checkout --orphan B &&
1323+
git reset --hard &&
1324+
mkdir -p pkg/list &&
1325+
test_commit pkg/list/whatever &&
1326+
1327+
git checkout A &&
1328+
git merge --allow-unrelated-histories --no-commit B &&
1329+
>pkg/list/wanted &&
1330+
git add pkg/list/wanted &&
1331+
git rm -f pkg/list/whatever.t &&
1332+
git commit &&
1333+
1334+
git filter-repo --force --path pkg/list &&
1335+
! test_path_is_file pkg/list/whatever.t &&
1336+
git ls-files >files &&
1337+
! grep pkg/list/whatever.t files
1338+
)
1339+
'
1340+
13121341
test_done

0 commit comments

Comments
 (0)