Skip to content

Commit 6ce9d55

Browse files
pks-tgitster
authored andcommitted
midx-write: skip rewriting MIDX with --stdin-packs unless needed
In `write_midx_internal()` we know to skip rewriting the multi-pack index in case the existing one already covers all packs. This logic does not know to handle `git multi-pack-index write --stdin-packs` though, so we end up always rewriting the MIDX in this case even if the MIDX would not change. With our default maintenance strategy this isn't really much of a problem, as git-gc(1) does not use the "--stdin-packs" option. But that is changing with geometric repacking, where "--stdin-packs" is used to explicitly select the packfiles part of the geometric sequence. This issue can be demonstrated trivially with a benchmark in the Git repository: executing `git repack --geometric=2 --write-midx -d` in the Git repository takes more than 3 seconds only to end up with the same multi-pack index as we already had before. The logic that decides if we need to rewrite the MIDX only checks whether the number of packfiles covered will change. That check is of course too lenient for "--stdin-packs", as it could happen that we want to cover a different-but-same-size set of packfiles. But there is no inherent reason why we cannot handle "--stdin-packs". Improve the logic to not only check for the number of packs, but to also verify that we are asked to generate a MIDX for the _same_ packs. This allows us to also skip no-op rewrites for "--stdin-packs". Helped-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent b3bab9d commit 6ce9d55

3 files changed

Lines changed: 156 additions & 30 deletions

File tree

midx-write.c

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,9 +1015,10 @@ static void clear_midx_files(struct odb_source *source,
10151015
strbuf_release(&buf);
10161016
}
10171017

1018-
static bool midx_needs_update(struct write_midx_context *ctx)
1018+
static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_context *ctx)
10191019
{
1020-
struct multi_pack_index *midx = ctx->m;
1020+
struct strset packs = STRSET_INIT;
1021+
struct strbuf buf = STRBUF_INIT;
10211022
bool needed = true;
10221023

10231024
/*
@@ -1028,25 +1029,48 @@ static bool midx_needs_update(struct write_midx_context *ctx)
10281029
if (ctx->incremental)
10291030
goto out;
10301031

1031-
/*
1032-
* If there is no MIDX then either it doesn't exist, or we're doing a
1033-
* geometric repack. We cannot (yet) determine whether we need to
1034-
* update the multi-pack index in the second case.
1035-
*/
1036-
if (!midx)
1037-
goto out;
1038-
10391032
/*
10401033
* Otherwise, we need to verify that the packs covered by the existing
1041-
* MIDX match the packs that we already have. This test is somewhat
1042-
* lenient and will be fixed.
1034+
* MIDX match the packs that we already have. The logic to do so is way
1035+
* more complicated than it has any right to be. This is because:
1036+
*
1037+
* - We cannot assume any ordering.
1038+
*
1039+
* - The MIDX packs may not be loaded at all, and loading them would
1040+
* be wasteful. So we need to use the pack names tracked by the
1041+
* MIDX itself.
1042+
*
1043+
* - The MIDX pack names are tracking the ".idx" files, whereas the
1044+
* packs themselves are tracking the ".pack" files. So we need to
1045+
* strip suffixes.
10431046
*/
10441047
if (ctx->nr != midx->num_packs + midx->num_packs_in_base)
10451048
goto out;
10461049

1050+
for (uint32_t i = 0; i < ctx->nr; i++) {
1051+
strbuf_reset(&buf);
1052+
strbuf_addstr(&buf, pack_basename(ctx->info[i].p));
1053+
strbuf_strip_suffix(&buf, ".pack");
1054+
1055+
if (!strset_add(&packs, buf.buf))
1056+
BUG("same pack added twice?");
1057+
}
1058+
1059+
for (uint32_t i = 0; i < ctx->nr; i++) {
1060+
strbuf_reset(&buf);
1061+
strbuf_addstr(&buf, midx->pack_names[i]);
1062+
strbuf_strip_suffix(&buf, ".idx");
1063+
1064+
if (!strset_contains(&packs, buf.buf))
1065+
goto out;
1066+
strset_remove(&packs, buf.buf);
1067+
}
1068+
10471069
needed = false;
10481070

10491071
out:
1072+
strbuf_release(&buf);
1073+
strset_clear(&packs);
10501074
return needed;
10511075
}
10521076

@@ -1067,6 +1091,7 @@ static int write_midx_internal(struct odb_source *source,
10671091
struct write_midx_context ctx = {
10681092
.preferred_pack_idx = NO_PREFERRED_PACK,
10691093
};
1094+
struct multi_pack_index *midx_to_free = NULL;
10701095
int bitmapped_packs_concat_len = 0;
10711096
int pack_name_concat_len = 0;
10721097
int dropped_packs = 0;
@@ -1147,25 +1172,39 @@ static int write_midx_internal(struct odb_source *source,
11471172
for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx);
11481173
stop_progress(&ctx.progress);
11491174

1150-
if (!packs_to_include && !packs_to_drop && !midx_needs_update(&ctx)) {
1151-
struct bitmap_index *bitmap_git;
1152-
int bitmap_exists;
1153-
int want_bitmap = flags & MIDX_WRITE_BITMAP;
1154-
1155-
bitmap_git = prepare_midx_bitmap_git(ctx.m);
1156-
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
1157-
free_bitmap_index(bitmap_git);
1158-
1159-
if (bitmap_exists || !want_bitmap) {
1160-
/*
1161-
* The correct MIDX already exists, and so does a
1162-
* corresponding bitmap (or one wasn't requested).
1163-
*/
1164-
if (!want_bitmap)
1165-
clear_midx_files_ext(source, "bitmap", NULL);
1166-
result = 0;
1167-
goto cleanup;
1175+
if (!packs_to_drop) {
1176+
/*
1177+
* If there is no MIDX then either it doesn't exist, or we're
1178+
* doing a geometric repack. Try to load it from the source to
1179+
* tell these two cases apart.
1180+
*/
1181+
struct multi_pack_index *midx = ctx.m;
1182+
if (!midx)
1183+
midx = midx_to_free = load_multi_pack_index(ctx.source);
1184+
1185+
if (midx && !midx_needs_update(midx, &ctx)) {
1186+
struct bitmap_index *bitmap_git;
1187+
int bitmap_exists;
1188+
int want_bitmap = flags & MIDX_WRITE_BITMAP;
1189+
1190+
bitmap_git = prepare_midx_bitmap_git(midx);
1191+
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
1192+
free_bitmap_index(bitmap_git);
1193+
1194+
if (bitmap_exists || !want_bitmap) {
1195+
/*
1196+
* The correct MIDX already exists, and so does a
1197+
* corresponding bitmap (or one wasn't requested).
1198+
*/
1199+
if (!want_bitmap)
1200+
clear_midx_files_ext(source, "bitmap", NULL);
1201+
result = 0;
1202+
goto cleanup;
1203+
}
11681204
}
1205+
1206+
close_midx(midx_to_free);
1207+
midx_to_free = NULL;
11691208
}
11701209

11711210
if (ctx.incremental && !ctx.nr) {
@@ -1521,6 +1560,7 @@ static int write_midx_internal(struct odb_source *source,
15211560
free(keep_hashes);
15221561
}
15231562
strbuf_release(&midx_name);
1563+
close_midx(midx_to_free);
15241564

15251565
trace2_region_leave("midx", "write_midx_internal", r);
15261566

t/t5319-multi-pack-index.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,57 @@ test_expect_success 'preferred pack cannot be determined without bitmap' '
366366
)
367367
'
368368

369+
test_midx_is_retained () {
370+
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
371+
ls -l .git/objects/pack/multi-pack-index >expect &&
372+
git multi-pack-index write "$@" &&
373+
ls -l .git/objects/pack/multi-pack-index >actual &&
374+
test_cmp expect actual
375+
}
376+
377+
test_midx_is_rewritten () {
378+
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
379+
ls -l .git/objects/pack/multi-pack-index >expect &&
380+
git multi-pack-index write "$@" &&
381+
ls -l .git/objects/pack/multi-pack-index >actual &&
382+
! test_cmp expect actual
383+
}
384+
385+
test_expect_success 'up-to-date multi-pack-index is retained' '
386+
test_when_finished "rm -fr midx-up-to-date" &&
387+
git init midx-up-to-date &&
388+
(
389+
cd midx-up-to-date &&
390+
391+
# Write the initial pack that contains the most objects.
392+
test_commit first &&
393+
test_commit second &&
394+
git repack -Ad --write-midx &&
395+
test_midx_is_retained &&
396+
397+
# Writing a new bitmap index should cause us to regenerate the MIDX.
398+
test_midx_is_rewritten --bitmap &&
399+
test_midx_is_retained --bitmap &&
400+
401+
# Ensure that writing a new packfile causes us to rewrite the index.
402+
test_commit incremental &&
403+
git repack -d &&
404+
test_midx_is_rewritten &&
405+
test_midx_is_retained &&
406+
407+
for pack in .git/objects/pack/*.idx
408+
do
409+
basename "$pack" || exit 1
410+
done >stdin &&
411+
test_line_count = 2 stdin &&
412+
test_midx_is_retained --stdin-packs <stdin &&
413+
head -n1 stdin >stdin.trimmed &&
414+
test_midx_is_rewritten --stdin-packs <stdin.trimmed
415+
)
416+
'
417+
418+
test_done
419+
369420
test_expect_success 'verify multi-pack-index success' '
370421
git multi-pack-index verify --object-dir=$objdir
371422
'

t/t7703-repack-geometric.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,41 @@ test_expect_success '--geometric with pack.packSizeLimit' '
287287
)
288288
'
289289

290+
test_expect_success '--geometric --write-midx retains up-to-date MIDX without bitmap index' '
291+
test_when_finished "rm -fr repo" &&
292+
git init repo &&
293+
(
294+
cd repo &&
295+
test_commit initial &&
296+
297+
test_path_is_missing .git/objects/pack/multi-pack-index &&
298+
git repack --geometric=2 --write-midx --no-write-bitmap-index &&
299+
test_path_is_file .git/objects/pack/multi-pack-index &&
300+
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
301+
302+
ls -l .git/objects/pack/ >expect &&
303+
git repack --geometric=2 --write-midx --no-write-bitmap-index &&
304+
ls -l .git/objects/pack/ >actual &&
305+
test_cmp expect actual
306+
)
307+
'
308+
309+
test_expect_success '--geometric --write-midx retains up-to-date MIDX with bitmap index' '
310+
test_when_finished "rm -fr repo" &&
311+
git init repo &&
312+
test_commit -C repo initial &&
313+
314+
test_path_is_missing repo/.git/objects/pack/multi-pack-index &&
315+
git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
316+
test_path_is_file repo/.git/objects/pack/multi-pack-index &&
317+
test-tool chmtime =0 repo/.git/objects/pack/multi-pack-index &&
318+
319+
ls -l repo/.git/objects/pack/ >expect &&
320+
git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
321+
ls -l repo/.git/objects/pack/ >actual &&
322+
test_cmp expect actual
323+
'
324+
290325
test_expect_success '--geometric --write-midx with packfiles in main and alternate ODB' '
291326
test_when_finished "rm -fr shared member" &&
292327

0 commit comments

Comments
 (0)