Skip to content

Commit d93be9c

Browse files
committed
Merge branch 'ps/fsck-stream-from-the-right-object-instance'
"fsck" iterates over packfiles and its access to pack data caused the list to be permuted, which caused it to loop forever; the code to access pack data by "fsck" has been updated to avoid this. * ps/fsck-stream-from-the-right-object-instance: pack-check: fix verification of large objects packfile: expose function to read object stream for an offset object-file: adapt `stream_object_signature()` to take a stream t/helper: improve "genrandom" test helper
2 parents db227bc + 13eb65d commit d93be9c

14 files changed

Lines changed: 114 additions & 43 deletions

object-file.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
129129
return !oideq(oid, &real_oid) ? -1 : 0;
130130
}
131131

132-
int stream_object_signature(struct repository *r, const struct object_id *oid)
132+
int stream_object_signature(struct repository *r,
133+
struct odb_read_stream *st,
134+
const struct object_id *oid)
133135
{
134136
struct object_id real_oid;
135-
struct odb_read_stream *st;
136137
struct git_hash_ctx c;
137138
char hdr[MAX_HEADER_LEN];
138139
int hdrlen;
139140

140-
st = odb_read_stream_open(r->objects, oid, NULL);
141-
if (!st)
142-
return -1;
143-
144141
/* Generate the header */
145142
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
146143

@@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
160157
git_hash_update(&c, buf, readlen);
161158
}
162159
git_hash_final_oid(&real_oid, &c);
163-
odb_read_stream_close(st);
164160
return !oideq(oid, &real_oid) ? -1 : 0;
165161
}
166162

object-file.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
166166
* Try reading the object named with "oid" using
167167
* the streaming interface and rehash it to do the same.
168168
*/
169-
int stream_object_signature(struct repository *r, const struct object_id *oid);
169+
int stream_object_signature(struct repository *r,
170+
struct odb_read_stream *stream,
171+
const struct object_id *oid);
170172

171173
enum finalize_object_file_flags {
172174
FOF_SKIP_COLLISION_CHECK = 1,

object.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "object.h"
77
#include "replace-object.h"
88
#include "object-file.h"
9+
#include "odb/streaming.h"
910
#include "blob.h"
1011
#include "statinfo.h"
1112
#include "tree.h"
@@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,
343344

344345
if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
345346
odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
346-
if (!skip_hash && stream_object_signature(r, repl) < 0) {
347-
error(_("hash mismatch %s"), oid_to_hex(oid));
348-
return NULL;
347+
if (!skip_hash) {
348+
struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
349+
350+
if (!stream) {
351+
error(_("unable to open object stream for %s"), oid_to_hex(oid));
352+
return NULL;
353+
}
354+
355+
if (stream_object_signature(r, stream, repl) < 0) {
356+
error(_("hash mismatch %s"), oid_to_hex(oid));
357+
odb_read_stream_close(stream);
358+
return NULL;
359+
}
360+
361+
odb_read_stream_close(stream);
349362
}
350363
parse_blob_buffer(lookup_blob(r, oid));
351364
return lookup_object(r, oid);

pack-check.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "packfile.h"
1010
#include "object-file.h"
1111
#include "odb.h"
12+
#include "odb/streaming.h"
1213

1314
struct idx_entry {
1415
off_t offset;
@@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
104105
QSORT(entries, nr_objects, compare_entries);
105106

106107
for (i = 0; i < nr_objects; i++) {
108+
struct odb_read_stream *stream = NULL;
107109
void *data;
108110
struct object_id oid;
109111
enum object_type type;
@@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
152154
type) < 0)
153155
err = error("packed %s from %s is corrupt",
154156
oid_to_hex(&oid), p->pack_name);
155-
else if (!data && stream_object_signature(r, &oid) < 0)
157+
else if (!data &&
158+
(packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
159+
stream_object_signature(r, stream, &oid) < 0))
156160
err = error("packed %s from %s is corrupt",
157161
oid_to_hex(&oid), p->pack_name);
158162
else if (fn) {
@@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
163167
}
164168
if (((base_count + i) & 1023) == 0)
165169
display_progress(progress, base_count + i);
166-
free(data);
167170

171+
if (stream)
172+
odb_read_stream_close(stream);
173+
free(data);
168174
}
175+
169176
display_progress(progress, base_count + i);
170177
free(entries);
171-
172178
return err;
173179
}
174180

packfile.c

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
26212621
return 0;
26222622
}
26232623

2624-
int packfile_store_read_object_stream(struct odb_read_stream **out,
2625-
struct packfile_store *store,
2626-
const struct object_id *oid)
2624+
int packfile_read_object_stream(struct odb_read_stream **out,
2625+
const struct object_id *oid,
2626+
struct packed_git *pack,
2627+
off_t offset)
26272628
{
26282629
struct odb_packed_read_stream *stream;
26292630
struct pack_window *window = NULL;
2630-
struct object_info oi = OBJECT_INFO_INIT;
26312631
enum object_type in_pack_type;
26322632
unsigned long size;
26332633

2634-
oi.sizep = &size;
2634+
in_pack_type = unpack_object_header(pack, &window, &offset, &size);
2635+
unuse_pack(&window);
26352636

2636-
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
2637-
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
2638-
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
2639-
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
2637+
if (repo_settings_get_big_file_threshold(pack->repo) >= size)
26402638
return -1;
26412639

2642-
in_pack_type = unpack_object_header(oi.u.packed.pack,
2643-
&window,
2644-
&oi.u.packed.offset,
2645-
&size);
2646-
unuse_pack(&window);
26472640
switch (in_pack_type) {
26482641
default:
26492642
return -1; /* we do not do deltas for now */
2643+
case OBJ_BAD:
2644+
mark_bad_packed_object(pack, oid);
2645+
return -1;
26502646
case OBJ_COMMIT:
26512647
case OBJ_TREE:
26522648
case OBJ_BLOB:
@@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
26602656
stream->base.type = in_pack_type;
26612657
stream->base.size = size;
26622658
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
2663-
stream->pack = oi.u.packed.pack;
2664-
stream->pos = oi.u.packed.offset;
2659+
stream->pack = pack;
2660+
stream->pos = offset;
26652661

26662662
*out = &stream->base;
26672663

26682664
return 0;
26692665
}
2666+
2667+
int packfile_store_read_object_stream(struct odb_read_stream **out,
2668+
struct packfile_store *store,
2669+
const struct object_id *oid)
2670+
{
2671+
struct pack_entry e;
2672+
2673+
if (!find_pack_entry(store, oid, &e))
2674+
return -1;
2675+
2676+
return packfile_read_object_stream(out, oid, e.p, e.offset);
2677+
}

packfile.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
449449
off_t *curpos, enum object_type type,
450450
off_t delta_obj_offset);
451451

452+
int packfile_read_object_stream(struct odb_read_stream **out,
453+
const struct object_id *oid,
454+
struct packed_git *pack,
455+
off_t offset);
456+
452457
void release_pack_memory(size_t);
453458

454459
/* global flag to enable extra checks when accessing packed objects */

t/helper/test-genrandom.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "test-tool.h"
88
#include "git-compat-util.h"
9+
#include "parse.h"
910

1011
int cmd__genrandom(int argc, const char **argv)
1112
{
@@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
2223
next = next * 11 + *c;
2324
} while (*c++);
2425

25-
count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX;
26+
count = ULONG_MAX;
27+
if (argc == 3 && !git_parse_ulong(argv[2], &count))
28+
return error_errno("cannot parse argument '%s'", argv[2]);
2629

2730
while (count--) {
2831
next = next * 1103515245 + 12345;

t/t1006-cat-file.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
643643
'
644644

645645
test_expect_success 'setup blobs which are likely to delta' '
646-
test-tool genrandom foo 10240 >foo &&
646+
test-tool genrandom foo 10k >foo &&
647647
{ cat foo && echo plus; } >foo-plus &&
648648
git add foo foo-plus &&
649649
git commit -m foo &&

t/t1050-large.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
104104
# mid1 and mid2 will fit within 256k limit but
105105
# appending mid3 will bust the limit and will
106106
# result in a separate packfile.
107-
test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
108-
test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
109-
test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
107+
test-tool genrandom "a" 66k >mid1 &&
108+
test-tool genrandom "b" 80k >mid2 &&
109+
test-tool genrandom "c" 128k >mid3 &&
110110
git add mid1 mid2 mid3 &&
111111
112112
count=0 &&

t/t1450-fsck.sh

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
852852
! grep corrupt out
853853
'
854854

855+
test_expect_success 'fsck handles multiple packfiles with big blobs' '
856+
test_when_finished "rm -rf repo" &&
857+
git init repo &&
858+
(
859+
cd repo &&
860+
861+
# We construct two packfiles with two objects in common and one
862+
# object not in common. The objects in common can then be
863+
# corrupted in one of the packfiles, respectively. The other
864+
# objects that are unique to the packs are merely used to not
865+
# have both packs contain the same data.
866+
blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
867+
blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
868+
blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
869+
blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
870+
pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
871+
pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
872+
chmod a+w .git/objects/pack/pack-*.pack &&
873+
874+
# Corrupt blob two in the first pack.
875+
git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
876+
offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
877+
printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&
878+
879+
# Corrupt blob three in the second pack.
880+
git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
881+
offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
882+
printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&
883+
884+
# We now expect to see two failures for the corrupted objects,
885+
# even though they exist in a non-corrupted form in the
886+
# respective other pack.
887+
test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
888+
test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
889+
test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
890+
)
891+
'
892+
855893
test_expect_success 'fsck fails on corrupt packfile' '
856894
hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
857895
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
@@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
918956
test_expect_success 'fsck detects truncated loose object' '
919957
# make it big enough that we know we will truncate in the data
920958
# portion, not the header
921-
test-tool genrandom truncate 4096 >file &&
959+
test-tool genrandom truncate 4k >file &&
922960
blob=$(git hash-object -w file) &&
923961
file=$(sha1_file $blob) &&
924962
test_when_finished "remove_object $blob" &&

0 commit comments

Comments
 (0)