Skip to content

Commit d31d1f2

Browse files
ttaylorrgitster
authored andcommitted
pack-objects: refactor read_packs_list_from_stdin() to use strmap
The '--stdin-packs' mode of pack-objects maintains two separate string_lists: one for included packs, and one for excluded packs. Each list stores the pack basename as a string and the corresponding `packed_git` pointer in its `->util` field. This works, but makes it awkward to extend the set of pack "kinds" that pack-objects can accept via stdin, since each new kind would need its own string_list and duplicated handling. A future commit will want to do just this, so prepare for that change by handling the various "kinds" of packs specified over stdin in a more generic fashion. Namely, replace the two `string_list`s with a single `strmap` keyed on the pack basename, with values pointing to a new `struct stdin_pack_info`. This struct tracks both the `packed_git` pointer and a `kind` bitfield indicating whether the pack was specified as included or excluded. Extract the logic for sorting packs by mtime and adding their objects into a separate `stdin_packs_add_pack_entries()` helper. While we could have used a `string_list`, we must handle the case where the same pack is specified more than once. With a `string_list` only, we would have to pay a quadratic cost to either (a) insert elements into their sorted positions, or (b) a repeated linear search, which is accidentally quadratic. For that reason, use a strmap instead. This patch does not include any functional changes. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 81e2906 commit d31d1f2

1 file changed

Lines changed: 112 additions & 67 deletions

File tree

builtin/pack-objects.c

Lines changed: 112 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "reachable.h"
2929
#include "oid-array.h"
3030
#include "strvec.h"
31+
#include "strmap.h"
3132
#include "list.h"
3233
#include "packfile.h"
3334
#include "object-file.h"
@@ -3820,107 +3821,151 @@ static void show_commit_pack_hint(struct commit *commit, void *data)
38203821

38213822
}
38223823

3824+
/*
3825+
* stdin_pack_info_kind specifies how a pack specified over stdin
3826+
* should be treated when pack-objects is invoked with --stdin-packs.
3827+
*
3828+
* - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set
3829+
* should be included in the output pack, unless they appear in an
3830+
* excluded pack.
3831+
*
3832+
* - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag
3833+
* bit set should be excluded from the output pack.
3834+
*
3835+
* Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are
3836+
* used as traversal tips when invoked with --stdin-packs=follow.
3837+
*/
3838+
enum stdin_pack_info_kind {
3839+
STDIN_PACK_INCLUDE = (1<<0),
3840+
STDIN_PACK_EXCLUDE_CLOSED = (1<<1),
3841+
};
3842+
3843+
struct stdin_pack_info {
3844+
struct packed_git *p;
3845+
enum stdin_pack_info_kind kind;
3846+
};
3847+
38233848
static int pack_mtime_cmp(const void *_a, const void *_b)
38243849
{
3825-
struct packed_git *a = ((const struct string_list_item*)_a)->util;
3826-
struct packed_git *b = ((const struct string_list_item*)_b)->util;
3850+
struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util;
3851+
struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util;
38273852

38283853
/*
38293854
* order packs by descending mtime so that objects are laid out
38303855
* roughly as newest-to-oldest
38313856
*/
3832-
if (a->mtime < b->mtime)
3857+
if (a->p->mtime < b->p->mtime)
38333858
return 1;
3834-
else if (b->mtime < a->mtime)
3859+
else if (b->p->mtime < a->p->mtime)
38353860
return -1;
38363861
else
38373862
return 0;
38383863
}
38393864

3840-
static void read_packs_list_from_stdin(struct rev_info *revs)
3865+
static void stdin_packs_add_pack_entries(struct strmap *packs,
3866+
struct rev_info *revs)
3867+
{
3868+
struct string_list keys = STRING_LIST_INIT_NODUP;
3869+
struct string_list_item *item;
3870+
struct hashmap_iter iter;
3871+
struct strmap_entry *entry;
3872+
3873+
strmap_for_each_entry(packs, &iter, entry) {
3874+
struct stdin_pack_info *info = entry->value;
3875+
if (!info->p)
3876+
die(_("could not find pack '%s'"), entry->key);
3877+
3878+
string_list_append(&keys, entry->key)->util = info;
3879+
}
3880+
3881+
/*
3882+
* Order packs by ascending mtime; use QSORT directly to access the
3883+
* string_list_item's ->util pointer, which string_list_sort() does not
3884+
* provide.
3885+
*/
3886+
QSORT(keys.items, keys.nr, pack_mtime_cmp);
3887+
3888+
for_each_string_list_item(item, &keys) {
3889+
struct stdin_pack_info *info = item->util;
3890+
3891+
if (info->kind & STDIN_PACK_INCLUDE)
3892+
for_each_object_in_pack(info->p,
3893+
add_object_entry_from_pack,
3894+
revs,
3895+
ODB_FOR_EACH_OBJECT_PACK_ORDER);
3896+
}
3897+
3898+
string_list_clear(&keys, 0);
3899+
}
3900+
3901+
static void stdin_packs_read_input(struct rev_info *revs)
38413902
{
38423903
struct strbuf buf = STRBUF_INIT;
3843-
struct string_list include_packs = STRING_LIST_INIT_DUP;
3844-
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3845-
struct string_list_item *item = NULL;
3904+
struct strmap packs = STRMAP_INIT;
38463905
struct packed_git *p;
38473906

38483907
while (strbuf_getline(&buf, stdin) != EOF) {
3849-
if (!buf.len)
3908+
struct stdin_pack_info *info;
3909+
enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE;
3910+
const char *key = buf.buf;
3911+
3912+
if (!*key)
38503913
continue;
3914+
else if (*key == '^')
3915+
kind = STDIN_PACK_EXCLUDE_CLOSED;
38513916

3852-
if (*buf.buf == '^')
3853-
string_list_append(&exclude_packs, buf.buf + 1);
3854-
else
3855-
string_list_append(&include_packs, buf.buf);
3917+
if (kind != STDIN_PACK_INCLUDE)
3918+
key++;
3919+
3920+
info = strmap_get(&packs, key);
3921+
if (!info) {
3922+
CALLOC_ARRAY(info, 1);
3923+
strmap_put(&packs, key, info);
3924+
}
3925+
3926+
info->kind |= kind;
38563927

38573928
strbuf_reset(&buf);
38583929
}
38593930

3860-
string_list_sort_u(&include_packs, 0);
3861-
string_list_sort_u(&exclude_packs, 0);
3862-
38633931
repo_for_each_pack(the_repository, p) {
3864-
const char *pack_name = pack_basename(p);
3932+
struct stdin_pack_info *info;
3933+
3934+
info = strmap_get(&packs, pack_basename(p));
3935+
if (!info)
3936+
continue;
38653937

3866-
if ((item = string_list_lookup(&include_packs, pack_name))) {
3938+
if (info->kind & STDIN_PACK_INCLUDE) {
38673939
if (exclude_promisor_objects && p->pack_promisor)
38683940
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
3869-
item->util = p;
3941+
3942+
/*
3943+
* Arguments we got on stdin may not even be
3944+
* packs. First check that to avoid segfaulting
3945+
* later on in e.g. pack_mtime_cmp(), excluded
3946+
* packs are handled below.
3947+
*/
3948+
if (!is_pack_valid(p))
3949+
die(_("packfile %s cannot be accessed"), p->pack_name);
38703950
}
3871-
if ((item = string_list_lookup(&exclude_packs, pack_name)))
3872-
item->util = p;
3873-
}
38743951

3875-
/*
3876-
* Arguments we got on stdin may not even be packs. First
3877-
* check that to avoid segfaulting later on in
3878-
* e.g. pack_mtime_cmp(), excluded packs are handled below.
3879-
*
3880-
* Since we first parsed our STDIN and then sorted the input
3881-
* lines the pack we error on will be whatever line happens to
3882-
* sort first. This is lazy, it's enough that we report one
3883-
* bad case here, we don't need to report the first/last one,
3884-
* or all of them.
3885-
*/
3886-
for_each_string_list_item(item, &include_packs) {
3887-
struct packed_git *p = item->util;
3888-
if (!p)
3889-
die(_("could not find pack '%s'"), item->string);
3890-
if (!is_pack_valid(p))
3891-
die(_("packfile %s cannot be accessed"), p->pack_name);
3892-
}
3952+
if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) {
3953+
/*
3954+
* Marking excluded packs as kept in-core so
3955+
* that later calls to add_object_entry()
3956+
* discards any objects that are also found in
3957+
* excluded packs.
3958+
*/
3959+
p->pack_keep_in_core = 1;
3960+
}
38933961

3894-
/*
3895-
* Then, handle all of the excluded packs, marking them as
3896-
* kept in-core so that later calls to add_object_entry()
3897-
* discards any objects that are also found in excluded packs.
3898-
*/
3899-
for_each_string_list_item(item, &exclude_packs) {
3900-
struct packed_git *p = item->util;
3901-
if (!p)
3902-
die(_("could not find pack '%s'"), item->string);
3903-
p->pack_keep_in_core = 1;
3962+
info->p = p;
39043963
}
39053964

3906-
/*
3907-
* Order packs by ascending mtime; use QSORT directly to access the
3908-
* string_list_item's ->util pointer, which string_list_sort() does not
3909-
* provide.
3910-
*/
3911-
QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3912-
3913-
for_each_string_list_item(item, &include_packs) {
3914-
struct packed_git *p = item->util;
3915-
for_each_object_in_pack(p,
3916-
add_object_entry_from_pack,
3917-
revs,
3918-
ODB_FOR_EACH_OBJECT_PACK_ORDER);
3919-
}
3965+
stdin_packs_add_pack_entries(&packs, revs);
39203966

39213967
strbuf_release(&buf);
3922-
string_list_clear(&include_packs, 0);
3923-
string_list_clear(&exclude_packs, 0);
3968+
strmap_clear(&packs, 1);
39243969
}
39253970

39263971
static void add_unreachable_loose_objects(struct rev_info *revs);
@@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
39574002

39584003
/* avoids adding objects in excluded packs */
39594004
ignore_packed_keep_in_core = 1;
3960-
read_packs_list_from_stdin(&revs);
4005+
stdin_packs_read_input(&revs);
39614006
if (rev_list_unpacked)
39624007
add_unreachable_loose_objects(&revs);
39634008

0 commit comments

Comments
 (0)