forked from github/platform-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgit-purge-files
executable file
·149 lines (117 loc) · 4.96 KB
/
git-purge-files
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/perl
#
# Purge files from Git repositories
#
use 5.010;
use strict;
use warnings;
use version;
use Getopt::Std;
use File::Temp qw/ tempdir /;
sub usage() {
print STDERR <<END;
NAME
git-purge-files - Purge files from Git repositories
SYNOPSIS
git-purge-files [-c] [-d] [-h] [<path-regex>] ...
DESCRIPTION
This command purges files from a Git history by rewriting all
commits. Please note that this changes all commit hashes in the
history and therefore all branches and tags.
You want to run this script on a case sensitive file-system (e.g.
ext4 on Linux). Otherwise the resulting Git repository will not
contain changes that modify the casing of file paths.
OPTIONS
<path-regex>...
A list of regular expressions that defines what files should
be purged from the history. Use a `/` to anchor a path to the
root of the repository.
-c
Run in checking mode. The script will run the underlaying
`git fast-export | git fast-import` command without any
modifications to the data stream. Afterwards the input
repository is compared against the output repository.
For large repositories we recommend to run this script in
checking mode (-c) mode first in order to determine if it can
run in the much faster diff mode (-d) mode.
ATTENTION: Although we run a check here, the repository
under test is rewritten and potentially modified!
-d
Enable diff mode. This makes the underlaying `git fast-export`
output only the file differences between two commits. This
mode is quicker but more error prone. It is not recommended
in production usage.
See examples for potential problems here:
https://public-inbox.org/git/CABPp-BFLJ48BZ97Y9mr4i3q7HMqjq18cXMgSYdxqD1cMzH8Spg\@mail.gmail.com/
-h
This help.
EXAMPLES
o Remove the file "test.bin" from all directories:
\$ git-purge-files "/test.bin$"
o Remove all "*.bin" files from all directories:
\$ git-purge-files "\.bin$"
o Remove all files in the "/foo" directory:
\$ git-purge-files "^/foo/$"
END
exit(1);
}
our($opt_h, $opt_d, $opt_c);
getopts("hdc") or usage();
usage if $opt_h;
my ($git_version) = `git --version` =~ /([0-9]+([.][0-9]+)+)/;
my $export_opts = "--all --no-data --progress=1000 --signed-tags=warn-strip --tag-of-filtered-object=rewrite --use-done-feature";
$export_opts .= " --reencode=no" if (version->parse($git_version) ge version->parse('2.23.0'));
$export_opts .= " --full-tree" if (not $opt_d);
print $export_opts;
my $import_opts = "--done --force --quiet";
if ($opt_c) {
say "Checking 'git fast-export | git fast-import' pipeline... ";
# Print the changed files, author, committer, branches, and commit message
# for every commit of the Git repository. We intentionally do not output
# and compare any hashes here as commit and tree hashes can change due to
# slightly different object serialization methods in older Git clients.
# E.g. directories have been encoded as 40000 instead of 04000 for a brief
# period in ~2009 and "git fast-export | git fast-import" would fix that
# which would lead to different hashes.
my $git_log = "git log --all --numstat --full-history --format='%nauthor: %an <%ae> %at%ncommitter: %cn <%ce> %ct%nbranch: %S%nbody: %B%n%n---' --no-renames";
my $tmp = tempdir('git-purge-files-XXXXX', TMPDIR => 1);
if (
system("$git_log > $tmp/expected") or
system("git fast-export $export_opts | git fast-import $import_opts") or
system("$git_log > $tmp/result") or
system("diff $tmp/expected $tmp/result")
) {
say "";
say "Failure! Rewriting the repository with `git-purge-files` might alter the history.";
say "Inspect the following files to review the difference:";
say " - $tmp/expected";
say " - $tmp/result";
say "Try to omit the `-d` option!" if ($opt_d);
exit 1;
} else {
say "Success!";
exit 0;
}
} else {
say "Purging files...\n";
exit 0 if (@ARGV == 0);
my $path_regex = join( "|", @ARGV );
my $start_time = time;
open( my $pipe_in, "git fast-export $export_opts |" ) or die $!;
open( my $pipe_out, "| git fast-import $import_opts" ) or die $!;
LOOP: while ( my $cmd = <$pipe_in> ) {
my $data = "";
if ( $cmd =~ /^data ([0-9]+)$/ ) {
# skip data blocks
my $skip_bytes = $1;
read($pipe_in, $data, $skip_bytes);
}
elsif ( $cmd =~ /^M [0-9]{6} [0-9a-f]{40} (.+)$/ ) {
my $pathname = $1;
next LOOP if ("/" . $pathname) =~ /$path_regex/o
}
print {$pipe_out} $cmd . $data;
}
my $duration = time - $start_time;
say "Done! Execution time: $duration s";
}