Skip to content

Commit 3f20c21

Browse files
derrickstoleegitster
authored andcommitted
path-walk: support wildcard pathspecs for blob filtering
Previously, walk_objects_by_path() silently ignored pathspecs containing wildcards or magic by clearing them. This caused all blobs to be downloaded regardless of the given pathspec. Wildcard pathspecs like "d/file.*.txt" are useful for narrowing which blobs to process (e.g., during 'git backfill'). Support wildcard pathspecs by making two changes: 1. Add an 'exact_pathspecs' flag to path_walk_context. When the pathspec has no wildcards or magic, set this flag and use the existing fast-path prefix matching in add_tree_entries(). When wildcards are present, skip that block since prefix matching cannot handle glob patterns. 2. Add a match_pathspec() check in walk_path() to filter out blobs whose full path does not match the pathspec. This provides the actual blob-level filtering for wildcard pathspecs. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 7be1820 commit 3f20c21

2 files changed

Lines changed: 16 additions & 13 deletions

File tree

path-walk.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ struct path_walk_context {
6363
*/
6464
struct prio_queue path_stack;
6565
struct strset path_stack_pushed;
66+
67+
unsigned exact_pathspecs:1;
6668
};
6769

6870
static int compare_by_type(const void *one, const void *two, void *cb_data)
@@ -207,7 +209,7 @@ static int add_tree_entries(struct path_walk_context *ctx,
207209
match != MATCHED)
208210
continue;
209211
}
210-
if (ctx->revs->prune_data.nr) {
212+
if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) {
211213
struct pathspec *pd = &ctx->revs->prune_data;
212214
bool found = false;
213215
int did_strip_suffix = strbuf_strip_suffix(&path, "/");
@@ -302,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx,
302304
return 0;
303305
}
304306

307+
if (list->type == OBJ_BLOB &&
308+
ctx->revs->prune_data.nr &&
309+
!match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
310+
path, strlen(path), 0,
311+
NULL, 0))
312+
return 0;
313+
305314
/* Evaluate function pointer on this data, if requested. */
306315
if ((list->type == OBJ_TREE && ctx->info->trees) ||
307316
(list->type == OBJ_BLOB && ctx->info->blobs) ||
@@ -510,14 +519,9 @@ int walk_objects_by_path(struct path_walk_info *info)
510519
info->revs->tag_objects = 1;
511520

512521
if (ctx.revs->prune_data.nr) {
513-
/*
514-
* Only exact prefix pathspecs are currently supported.
515-
* Clear any wildcard or magic pathspecs to avoid
516-
* incorrect prefix matching.
517-
*/
518-
if (ctx.revs->prune_data.has_wildcard ||
519-
ctx.revs->prune_data.magic)
520-
clear_pathspec(&ctx.revs->prune_data);
522+
if (!ctx.revs->prune_data.has_wildcard &&
523+
!ctx.revs->prune_data.magic)
524+
ctx.exact_pathspecs = 1;
521525
}
522526

523527
/* Insert a single list for the root tree into the paths. */

t/t5620-backfill.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,12 +307,11 @@ test_expect_success 'backfill with wildcard pathspec' '
307307
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
308308
test_line_count = 48 missing &&
309309
310-
# TODO: The wildcard pathspec should limit downloaded blobs,
311-
# but currently all blobs are downloaded.
312-
git -C backfill-path backfill HEAD -- "d/file.*.txt" &&
310+
git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
311+
test_must_be_empty err &&
313312
314313
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
315-
test_line_count = 0 missing
314+
test_line_count = 40 missing
316315
'
317316

318317
test_expect_success 'backfill with --all' '

0 commit comments

Comments
 (0)