Skip to content

Commit 9bc1518

Browse files
pks-tgitster
authored andcommitted
builtin/maintenance: introduce "geometric-repack" task
Introduce a new "geometric-repack" task. This task uses our geometric repack infrastructure as provided by git-repack(1) itself, which is a strategy that especially hosting providers tend to use to amortize the costs of repacking objects. There is one issue though with geometric repacks, namely that they unconditionally pack all loose objects, regardless of whether or not they are reachable. This is done because it means that we can completely skip the reachability step, which significantly speeds up the operation. But it has the big downside that we are unable to expire objects over time. To address this issue we thus use a split strategy in this new task: whenever a geometric repack would merge together all packs, we instead do an all-into-one repack. By default, these all-into-one repacks have cruft packs enabled, so unreachable objects would now be written into their own pack. Consequently, they won't be soaked up during geometric repacking anymore and can be expired with the next full repack, assuming that their expiry date has surpassed. Signed-off-by: Patrick Steinhardt <ps@pks.im> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 60c0af8 commit 9bc1518

3 files changed

Lines changed: 251 additions & 0 deletions

File tree

Documentation/config/maintenance.adoc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ maintenance.incremental-repack.auto::
7575
number of pack-files not in the multi-pack-index is at least the value
7676
of `maintenance.incremental-repack.auto`. The default value is 10.
7777

78+
maintenance.geometric-repack.auto::
79+
This integer config option controls how often the `geometric-repack`
80+
task should be run as part of `git maintenance run --auto`. If zero,
81+
then the `geometric-repack` task will not run with the `--auto`
82+
option. A negative value will force the task to run every time.
83+
Otherwise, a positive value implies the command should run either when
84+
there are packfiles that need to be merged together to retain the
85+
geometric progression, or when there are at least this many loose
86+
objects that would be written into a new packfile. The default value is
87+
100.
88+
7889
maintenance.reflog-expire.auto::
7990
This integer config option controls how often the `reflog-expire` task
8091
should be run as part of `git maintenance run --auto`. If zero, then

builtin/gc.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "pack-objects.h"
3535
#include "path.h"
3636
#include "reflog.h"
37+
#include "repack.h"
3738
#include "rerere.h"
3839
#include "blob.h"
3940
#include "tree.h"
@@ -254,6 +255,7 @@ enum maintenance_task_label {
254255
TASK_PREFETCH,
255256
TASK_LOOSE_OBJECTS,
256257
TASK_INCREMENTAL_REPACK,
258+
TASK_GEOMETRIC_REPACK,
257259
TASK_GC,
258260
TASK_COMMIT_GRAPH,
259261
TASK_PACK_REFS,
@@ -1566,6 +1568,101 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
15661568
return 0;
15671569
}
15681570

1571+
static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts,
1572+
struct gc_config *cfg)
1573+
{
1574+
struct pack_geometry geometry = {
1575+
.split_factor = 2,
1576+
};
1577+
struct pack_objects_args po_args = {
1578+
.local = 1,
1579+
};
1580+
struct existing_packs existing_packs = EXISTING_PACKS_INIT;
1581+
struct string_list kept_packs = STRING_LIST_INIT_DUP;
1582+
struct child_process child = CHILD_PROCESS_INIT;
1583+
int ret;
1584+
1585+
existing_packs.repo = the_repository;
1586+
existing_packs_collect(&existing_packs, &kept_packs);
1587+
pack_geometry_init(&geometry, &existing_packs, &po_args);
1588+
pack_geometry_split(&geometry);
1589+
1590+
child.git_cmd = 1;
1591+
1592+
strvec_pushl(&child.args, "repack", "-d", "-l", NULL);
1593+
if (geometry.split < geometry.pack_nr)
1594+
strvec_push(&child.args, "--geometric=2");
1595+
else
1596+
add_repack_all_option(cfg, NULL, &child.args);
1597+
if (opts->quiet)
1598+
strvec_push(&child.args, "--quiet");
1599+
if (the_repository->settings.core_multi_pack_index)
1600+
strvec_push(&child.args, "--write-midx");
1601+
1602+
if (run_command(&child)) {
1603+
ret = error(_("failed to perform geometric repack"));
1604+
goto out;
1605+
}
1606+
1607+
ret = 0;
1608+
1609+
out:
1610+
existing_packs_release(&existing_packs);
1611+
pack_geometry_release(&geometry);
1612+
return ret;
1613+
}
1614+
1615+
static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED)
1616+
{
1617+
struct pack_geometry geometry = {
1618+
.split_factor = 2,
1619+
};
1620+
struct pack_objects_args po_args = {
1621+
.local = 1,
1622+
};
1623+
struct existing_packs existing_packs = EXISTING_PACKS_INIT;
1624+
struct string_list kept_packs = STRING_LIST_INIT_DUP;
1625+
int auto_value = 100;
1626+
int ret;
1627+
1628+
repo_config_get_int(the_repository, "maintenance.geometric-repack.auto",
1629+
&auto_value);
1630+
if (!auto_value)
1631+
return 0;
1632+
if (auto_value < 0)
1633+
return 1;
1634+
1635+
existing_packs.repo = the_repository;
1636+
existing_packs_collect(&existing_packs, &kept_packs);
1637+
pack_geometry_init(&geometry, &existing_packs, &po_args);
1638+
pack_geometry_split(&geometry);
1639+
1640+
/*
1641+
* When we'd merge at least two packs with one another we always
1642+
* perform the repack.
1643+
*/
1644+
if (geometry.split) {
1645+
ret = 1;
1646+
goto out;
1647+
}
1648+
1649+
/*
1650+
* Otherwise, we estimate the number of loose objects to determine
1651+
* whether we want to create a new packfile or not.
1652+
*/
1653+
if (too_many_loose_objects(auto_value)) {
1654+
ret = 1;
1655+
goto out;
1656+
}
1657+
1658+
ret = 0;
1659+
1660+
out:
1661+
existing_packs_release(&existing_packs);
1662+
pack_geometry_release(&geometry);
1663+
return ret;
1664+
}
1665+
15691666
typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts,
15701667
struct gc_config *cfg);
15711668
typedef int (*maintenance_auto_fn)(struct gc_config *cfg);
@@ -1608,6 +1705,11 @@ static const struct maintenance_task tasks[] = {
16081705
.background = maintenance_task_incremental_repack,
16091706
.auto_condition = incremental_repack_auto_condition,
16101707
},
1708+
[TASK_GEOMETRIC_REPACK] = {
1709+
.name = "geometric-repack",
1710+
.background = maintenance_task_geometric_repack,
1711+
.auto_condition = geometric_repack_auto_condition,
1712+
},
16111713
[TASK_GC] = {
16121714
.name = "gc",
16131715
.foreground = maintenance_task_gc_foreground,

t/t7900-maintenance.sh

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,144 @@ test_expect_success 'maintenance.incremental-repack.auto (when config is unset)'
465465
)
466466
'
467467

468+
run_and_verify_geometric_pack () {
469+
EXPECTED_PACKS="$1" &&
470+
471+
# Verify that we perform a geometric repack.
472+
rm -f "trace2.txt" &&
473+
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
474+
git maintenance run --task=geometric-repack 2>/dev/null &&
475+
test_subcommand git repack -d -l --geometric=2 \
476+
--quiet --write-midx <trace2.txt &&
477+
478+
# Verify that the number of packfiles matches our expectation.
479+
ls -l .git/objects/pack/*.pack >packfiles &&
480+
test_line_count = "$EXPECTED_PACKS" packfiles &&
481+
482+
# And verify that there are no loose objects anymore.
483+
git count-objects -v >count &&
484+
test_grep '^count: 0$' count
485+
}
486+
487+
test_expect_success 'geometric repacking task' '
488+
test_when_finished "rm -rf repo" &&
489+
git init repo &&
490+
(
491+
cd repo &&
492+
git config set maintenance.auto false &&
493+
test_commit initial &&
494+
495+
# The initial repack causes an all-into-one repack.
496+
GIT_TRACE2_EVENT="$(pwd)/initial-repack.txt" \
497+
git maintenance run --task=geometric-repack 2>/dev/null &&
498+
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
499+
--quiet --write-midx <initial-repack.txt &&
500+
501+
# Repacking should now cause a no-op geometric repack because
502+
# no packfiles need to be combined.
503+
ls -l .git/objects/pack >before &&
504+
run_and_verify_geometric_pack 1 &&
505+
ls -l .git/objects/pack >after &&
506+
test_cmp before after &&
507+
508+
# This incremental change creates a new packfile that only
509+
# soaks up loose objects. The packfiles are not getting merged
510+
# at this point.
511+
test_commit loose &&
512+
run_and_verify_geometric_pack 2 &&
513+
514+
# Both packfiles have 3 objects, so the next run would cause us
515+
# to merge all packfiles together. This should be turned into
516+
# an all-into-one-repack.
517+
GIT_TRACE2_EVENT="$(pwd)/all-into-one-repack.txt" \
518+
git maintenance run --task=geometric-repack 2>/dev/null &&
519+
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
520+
--quiet --write-midx <all-into-one-repack.txt &&
521+
522+
# The geometric repack soaks up unreachable objects.
523+
echo blob-1 | git hash-object -w --stdin -t blob &&
524+
run_and_verify_geometric_pack 2 &&
525+
526+
# A second unreachable object should be written into another packfile.
527+
echo blob-2 | git hash-object -w --stdin -t blob &&
528+
run_and_verify_geometric_pack 3 &&
529+
530+
# And these two small packs should now be merged via the
531+
# geometric repack. The large packfile should remain intact.
532+
run_and_verify_geometric_pack 2 &&
533+
534+
# If we now add two more objects and repack twice we should
535+
# then see another all-into-one repack. This time around
536+
# though, as we have unreachable objects, we should also see a
537+
# cruft pack.
538+
echo blob-3 | git hash-object -w --stdin -t blob &&
539+
echo blob-4 | git hash-object -w --stdin -t blob &&
540+
run_and_verify_geometric_pack 3 &&
541+
GIT_TRACE2_EVENT="$(pwd)/cruft-repack.txt" \
542+
git maintenance run --task=geometric-repack 2>/dev/null &&
543+
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
544+
--quiet --write-midx <cruft-repack.txt &&
545+
ls .git/objects/pack/*.pack >packs &&
546+
test_line_count = 2 packs &&
547+
ls .git/objects/pack/*.mtimes >cruft &&
548+
test_line_count = 1 cruft
549+
)
550+
'
551+
552+
test_geometric_repack_needed () {
553+
NEEDED="$1"
554+
GEOMETRIC_CONFIG="$2" &&
555+
rm -f trace2.txt &&
556+
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
557+
git ${GEOMETRIC_CONFIG:+-c maintenance.geometric-repack.$GEOMETRIC_CONFIG} \
558+
maintenance run --auto --task=geometric-repack 2>/dev/null &&
559+
case "$NEEDED" in
560+
true)
561+
test_grep "\[\"git\",\"repack\"," trace2.txt;;
562+
false)
563+
! test_grep "\[\"git\",\"repack\"," trace2.txt;;
564+
*)
565+
BUG "invalid parameter: $NEEDED";;
566+
esac
567+
}
568+
569+
test_expect_success 'geometric repacking with --auto' '
570+
test_when_finished "rm -rf repo" &&
571+
git init repo &&
572+
(
573+
cd repo &&
574+
575+
# An empty repository does not need repacking, except when
576+
# explicitly told to do it.
577+
test_geometric_repack_needed false &&
578+
test_geometric_repack_needed false auto=0 &&
579+
test_geometric_repack_needed false auto=1 &&
580+
test_geometric_repack_needed true auto=-1 &&
581+
582+
test_oid_init &&
583+
584+
# Loose objects cause a repack when crossing the limit. Note
585+
# that the number of objects gets extrapolated by having a look
586+
# at the "objects/17/" shard.
587+
test_commit "$(test_oid blob17_1)" &&
588+
test_geometric_repack_needed false &&
589+
test_commit "$(test_oid blob17_2)" &&
590+
test_geometric_repack_needed false auto=257 &&
591+
test_geometric_repack_needed true auto=256 &&
592+
593+
# Force another repack.
594+
test_commit first &&
595+
test_commit second &&
596+
test_geometric_repack_needed true auto=-1 &&
597+
598+
# We now have two packfiles that would be merged together. As
599+
# such, the repack should always happen unless the user has
600+
# disabled the auto task.
601+
test_geometric_repack_needed false auto=0 &&
602+
test_geometric_repack_needed true auto=9000
603+
)
604+
'
605+
468606
test_expect_success 'pack-refs task' '
469607
for n in $(test_seq 1 5)
470608
do

0 commit comments

Comments
 (0)