Skip to content

Commit 302aff0

Browse files
derrickstoleegitster
authored andcommitted
backfill: accept revision arguments
The existing implementation of 'git backfill' only includes downloading missing blobs reachable from HEAD. Advanced uses may desire more general commit limiting options, such as '--all' for all references, specifying a commit range via negative references, or specifying a recency of use such as with '--since=<date>'. All of these options are available if we use setup_revisions() to parse the unknown arguments with the revision machinery. This opens up a large number of possibilities, only a small set of which are tested here. For documentation, we avoid duplicating the option documentation and instead link to the documentation of 'git rev-list'. Note that these arguments currently allow specifying a pathspec, which modifies the commit history checks but does not limit the paths used in the backfill logic. This will be updated in a future change. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 9b474a6 commit 302aff0

3 files changed

Lines changed: 173 additions & 7 deletions

File tree

Documentation/git-backfill.adoc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,12 @@ OPTIONS
6363
current sparse-checkout. If the sparse-checkout feature is enabled,
6464
then `--sparse` is assumed and can be disabled with `--no-sparse`.
6565

66+
You may also specify the commit limiting options from linkgit:git-rev-list[1].
67+
6668
SEE ALSO
6769
--------
68-
linkgit:git-clone[1].
70+
linkgit:git-clone[1],
71+
linkgit:git-rev-list[1]
6972

7073
GIT
7174
---

builtin/backfill.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct backfill_context {
3535
struct oid_array current_batch;
3636
size_t min_batch_size;
3737
int sparse;
38+
struct rev_info revs;
3839
};
3940

4041
static void backfill_context_clear(struct backfill_context *ctx)
@@ -79,7 +80,6 @@ static int fill_missing_blobs(const char *path UNUSED,
7980

8081
static int do_backfill(struct backfill_context *ctx)
8182
{
82-
struct rev_info revs;
8383
struct path_walk_info info = PATH_WALK_INFO_INIT;
8484
int ret;
8585

@@ -91,13 +91,14 @@ static int do_backfill(struct backfill_context *ctx)
9191
}
9292
}
9393

94-
repo_init_revisions(ctx->repo, &revs, "");
95-
handle_revision_arg("HEAD", &revs, 0, 0);
94+
/* Walk from HEAD if otherwise unspecified. */
95+
if (!ctx->revs.pending.nr)
96+
add_head_to_pending(&ctx->revs);
9697

9798
info.blobs = 1;
9899
info.tags = info.commits = info.trees = 0;
99100

100-
info.revs = &revs;
101+
info.revs = &ctx->revs;
101102
info.path_fn = fill_missing_blobs;
102103
info.path_fn_data = ctx;
103104

@@ -108,7 +109,6 @@ static int do_backfill(struct backfill_context *ctx)
108109
download_batch(ctx);
109110

110111
path_walk_info_clear(&info);
111-
release_revisions(&revs);
112112
return ret;
113113
}
114114

@@ -120,6 +120,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
120120
.current_batch = OID_ARRAY_INIT,
121121
.min_batch_size = 50000,
122122
.sparse = 0,
123+
.revs = REV_INFO_INIT,
123124
};
124125
struct option options[] = {
125126
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
@@ -134,7 +135,12 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
134135
builtin_backfill_usage, options);
135136

136137
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
137-
0);
138+
PARSE_OPT_KEEP_UNKNOWN_OPT |
139+
PARSE_OPT_KEEP_ARGV0 |
140+
PARSE_OPT_KEEP_DASHDASH);
141+
142+
repo_init_revisions(repo, &ctx.revs, prefix);
143+
argc = setup_revisions(argc, argv, &ctx.revs, NULL);
138144

139145
repo_config(repo, git_default_config, NULL);
140146

@@ -143,5 +149,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
143149

144150
result = do_backfill(&ctx);
145151
backfill_context_clear(&ctx);
152+
release_revisions(&ctx.revs);
146153
return result;
147154
}

t/t5620-backfill.sh

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,162 @@ test_expect_success 'backfill --sparse without cone mode (negative)' '
224224
test_line_count = 12 missing
225225
'
226226

227+
test_expect_success 'backfill with revision range' '
228+
test_when_finished rm -rf backfill-revs &&
229+
git clone --no-checkout --filter=blob:none \
230+
--single-branch --branch=main \
231+
"file://$(pwd)/srv.bare" backfill-revs &&
232+
233+
# No blobs yet
234+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
235+
test_line_count = 48 missing &&
236+
237+
git -C backfill-revs backfill HEAD~2..HEAD &&
238+
239+
# 30 objects downloaded.
240+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
241+
test_line_count = 18 missing
242+
'
243+
244+
test_expect_success 'backfill with revisions over stdin' '
245+
test_when_finished rm -rf backfill-revs &&
246+
git clone --no-checkout --filter=blob:none \
247+
--single-branch --branch=main \
248+
"file://$(pwd)/srv.bare" backfill-revs &&
249+
250+
# No blobs yet
251+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
252+
test_line_count = 48 missing &&
253+
254+
cat >in <<-EOF &&
255+
HEAD
256+
^HEAD~2
257+
EOF
258+
259+
git -C backfill-revs backfill --stdin <in &&
260+
261+
# 30 objects downloaded.
262+
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
263+
test_line_count = 18 missing
264+
'
265+
266+
test_expect_success 'backfill with prefix pathspec' '
267+
test_when_finished rm -rf backfill-path &&
268+
git clone --bare --filter=blob:none \
269+
--single-branch --branch=main \
270+
"file://$(pwd)/srv.bare" backfill-path &&
271+
272+
# No blobs yet
273+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
274+
test_line_count = 48 missing &&
275+
276+
# TODO: The pathspec should limit the downloaded blobs to
277+
# only those matching the prefix "d/f", but currently all
278+
# blobs are downloaded.
279+
git -C backfill-path backfill HEAD -- d/f &&
280+
281+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
282+
test_line_count = 0 missing
283+
'
284+
285+
test_expect_success 'backfill with multiple pathspecs' '
286+
test_when_finished rm -rf backfill-path &&
287+
git clone --bare --filter=blob:none \
288+
--single-branch --branch=main \
289+
"file://$(pwd)/srv.bare" backfill-path &&
290+
291+
# No blobs yet
292+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
293+
test_line_count = 48 missing &&
294+
295+
# TODO: The pathspecs should limit the downloaded blobs to
296+
# only those matching "d/f" or "a", but currently all blobs
297+
# are downloaded.
298+
git -C backfill-path backfill HEAD -- d/f a &&
299+
300+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
301+
test_line_count = 0 missing
302+
'
303+
304+
test_expect_success 'backfill with wildcard pathspec' '
305+
test_when_finished rm -rf backfill-path &&
306+
git clone --bare --filter=blob:none \
307+
--single-branch --branch=main \
308+
"file://$(pwd)/srv.bare" backfill-path &&
309+
310+
# No blobs yet
311+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
312+
test_line_count = 48 missing &&
313+
314+
# TODO: The wildcard pathspec should limit downloaded blobs,
315+
# but currently all blobs are downloaded.
316+
git -C backfill-path backfill HEAD -- "d/file.*.txt" &&
317+
318+
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
319+
test_line_count = 0 missing
320+
'
321+
322+
test_expect_success 'backfill with --all' '
323+
test_when_finished rm -rf backfill-all &&
324+
git clone --no-checkout --filter=blob:none \
325+
"file://$(pwd)/srv-revs.bare" backfill-all &&
326+
327+
# All blobs from all refs are missing
328+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
329+
test_line_count = 54 missing &&
330+
331+
# Backfill from HEAD gets main blobs only
332+
git -C backfill-all backfill HEAD &&
333+
334+
# Other branch blobs still missing
335+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
336+
test_line_count = 2 missing &&
337+
338+
# Backfill with --all gets everything
339+
git -C backfill-all backfill --all &&
340+
341+
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
342+
test_line_count = 0 missing
343+
'
344+
345+
test_expect_success 'backfill with --first-parent' '
346+
test_when_finished rm -rf backfill-fp &&
347+
git clone --no-checkout --filter=blob:none \
348+
--single-branch --branch=main \
349+
"file://$(pwd)/srv-revs.bare" backfill-fp &&
350+
351+
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
352+
test_line_count = 52 missing &&
353+
354+
# --first-parent skips the side branch commits, so
355+
# s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed.
356+
git -C backfill-fp backfill --first-parent HEAD &&
357+
358+
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
359+
test_line_count = 2 missing
360+
'
361+
362+
test_expect_success 'backfill with --since' '
363+
test_when_finished rm -rf backfill-since &&
364+
git clone --no-checkout --filter=blob:none \
365+
--single-branch --branch=main \
366+
"file://$(pwd)/srv-revs.bare" backfill-since &&
367+
368+
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
369+
test_line_count = 52 missing &&
370+
371+
# Use a cutoff between commits 4 and 5 (between v1 and v2
372+
# iterations). Commits 5-8 still carry v1 of files 2-4 in
373+
# their trees, but v1 of file.1.txt is only in commits 1-4.
374+
SINCE=$(git -C backfill-since log --first-parent --reverse \
375+
--format=%ct HEAD~1 | sed -n 5p) &&
376+
git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD &&
377+
378+
# 6 missing: v1 of file.1.txt in all 6 directories
379+
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
380+
test_line_count = 6 missing
381+
'
382+
227383
. "$TEST_DIRECTORY"/lib-httpd.sh
228384
start_httpd
229385

0 commit comments

Comments
 (0)