Skip to content

Commit 7be1820

Browse files
derrickstoleegitster
authored andcommitted
backfill: work with prefix pathspecs
The previous change allowed specifying revision arguments over the 'git backfill' command-line. This created the opportunity for restricting the initial commit set by filtering the revision walk through a pathspec. Other than filtering the commit set (and thereby the root trees), this did not restrict the path-walk implementation of 'git backfill' and did not restrict the blobs that were downloaded to only those matching the pathspec. Update the path-walk API to accept certain kinds of pathspecs and to silently ignore anything too complex, for now. We will update this in the next change to properly restrict to even complex pathspecs. The current behavior focuses on pathspecs that match paths exactly. This includes exact filenames, including directory names as prefixes. Pathspecs containing wildcards or magic are cleared so the path walk downloads all blobs, as before. The reason for this restriction is to allow for a faster execution by pruning the path walk to only trees that could contribute towards one of those paths as a parent directory. The test directory 'd/f/' (next to 'd/file*.txt') was prepared in a previous commit to exercise the subtlety in prefix matching. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 302aff0 commit 7be1820

4 files changed

Lines changed: 52 additions & 11 deletions

File tree

path-walk.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "list-objects.h"
1212
#include "object.h"
1313
#include "oid-array.h"
14+
#include "path.h"
1415
#include "prio-queue.h"
1516
#include "repository.h"
1617
#include "revision.h"
@@ -206,6 +207,33 @@ static int add_tree_entries(struct path_walk_context *ctx,
206207
match != MATCHED)
207208
continue;
208209
}
210+
if (ctx->revs->prune_data.nr) {
211+
struct pathspec *pd = &ctx->revs->prune_data;
212+
bool found = false;
213+
int did_strip_suffix = strbuf_strip_suffix(&path, "/");
214+
215+
216+
for (int i = 0; i < pd->nr; i++) {
217+
struct pathspec_item *item = &pd->items[i];
218+
219+
/*
220+
* Continue if either is a directory prefix
221+
* of the other.
222+
*/
223+
if (dir_prefix(path.buf, item->match) ||
224+
dir_prefix(item->match, path.buf)) {
225+
found = true;
226+
break;
227+
}
228+
}
229+
230+
if (did_strip_suffix)
231+
strbuf_addch(&path, '/');
232+
233+
/* Skip paths that do not match the prefix. */
234+
if (!found)
235+
continue;
236+
}
209237

210238
add_path_to_list(ctx, path.buf, type, &entry.oid,
211239
!(o->flags & UNINTERESTING));
@@ -481,6 +509,17 @@ int walk_objects_by_path(struct path_walk_info *info)
481509
if (info->tags)
482510
info->revs->tag_objects = 1;
483511

512+
if (ctx.revs->prune_data.nr) {
513+
/*
514+
* Only exact prefix pathspecs are currently supported.
515+
* Clear any wildcard or magic pathspecs to avoid
516+
* incorrect prefix matching.
517+
*/
518+
if (ctx.revs->prune_data.has_wildcard ||
519+
ctx.revs->prune_data.magic)
520+
clear_pathspec(&ctx.revs->prune_data);
521+
}
522+
484523
/* Insert a single list for the root tree into the paths. */
485524
CALLOC_ARRAY(root_tree_list, 1);
486525
root_tree_list->type = OBJ_TREE;

path.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ static void strbuf_cleanup_path(struct strbuf *sb)
5656
strbuf_remove(sb, 0, path - sb->buf);
5757
}
5858

59-
static int dir_prefix(const char *buf, const char *dir)
59+
int dir_prefix(const char *buf, const char *dir)
6060
{
6161
size_t len = strlen(dir);
6262
return !strncmp(buf, dir, len) &&

path.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ const char *repo_submodule_path_replace(struct repository *repo,
112112
const char *fmt, ...)
113113
__attribute__((format (printf, 4, 5)));
114114

115+
/*
116+
* Given a directory name 'dir' (not ending with a trailing '/'),
117+
* determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'.
118+
*/
119+
int dir_prefix(const char *buf, const char *dir);
120+
115121
void report_linked_checkout_garbage(struct repository *r);
116122

117123
/*

t/t5620-backfill.sh

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,11 @@ test_expect_success 'backfill with prefix pathspec' '
273273
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
274274
test_line_count = 48 missing &&
275275
276-
# TODO: The pathspec should limit the downloaded blobs to
277-
# only those matching the prefix "d/f", but currently all
278-
# blobs are downloaded.
279-
git -C backfill-path backfill HEAD -- d/f &&
276+
git -C backfill-path backfill HEAD -- d/f 2>err &&
277+
test_must_be_empty err &&
280278
281279
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
282-
test_line_count = 0 missing
280+
test_line_count = 40 missing
283281
'
284282

285283
test_expect_success 'backfill with multiple pathspecs' '
@@ -292,13 +290,11 @@ test_expect_success 'backfill with multiple pathspecs' '
292290
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
293291
test_line_count = 48 missing &&
294292
295-
# TODO: The pathspecs should limit the downloaded blobs to
296-
# only those matching "d/f" or "a", but currently all blobs
297-
# are downloaded.
298-
git -C backfill-path backfill HEAD -- d/f a &&
293+
git -C backfill-path backfill HEAD -- d/f a 2>err &&
294+
test_must_be_empty err &&
299295
300296
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
301-
test_line_count = 0 missing
297+
test_line_count = 16 missing
302298
'
303299

304300
test_expect_success 'backfill with wildcard pathspec' '

0 commit comments

Comments
 (0)