Skip to content

Commit 7b7cbae

Browse files
pks-tgitster
authored andcommitted
odb: introduce mtime fields for object info requests
There are some use cases where we need to figure out the mtime for objects. Most importantly, this is the case when we want to prune unreachable objects. But getting at that data requires users to manually derive the info either via the loose object's mtime, the packfiles' mtime or via the ".mtimes" file. Introduce a new `struct object_info::mtimep` pointer that allows callers to request an object's mtime. This new field will be used in a subsequent commit. Note that the concept of "mtime" is ambiguous: given an object, it may be stored multiple times in the object database, and each of these instances may have a different mtime. Disambiguating these mtimes is nothing that can happen on the generic ODB layer: the caller may search for the oldest object, the newest object, or even the relation of object mtimes depending on the specific source they are located in. As such, it is the responsibility of the caller to disambiguate mtimes. A consequence of this is that it's most likely incorrect to look up the mtime via `odb_read_object_info()`, as this interface does not give us enough information to disambiguate the mtime. Document this accordingly and tell users to use `odb_for_each_object()` instead. Even with this gotcha though it's sensible to have this request as part of the object info, as the mtime is a property of the object storage format. If we for example had a "black-box" storage backend, we'd still need to be able to query it for the mtime info in a generic way. We could introduce a safety mechanism that for example calls `BUG()` in case we look up the mtime outside of `odb_for_each_object()`. But that feels somewhat heavy-handed. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 317ea9a commit 7b7cbae

4 files changed

Lines changed: 74 additions & 11 deletions

File tree

object-file.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
409409
char hdr[MAX_HEADER_LEN];
410410
unsigned long size_scratch;
411411
enum object_type type_scratch;
412+
struct stat st;
412413

413414
/*
414415
* If we don't care about type or size, then we don't
@@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
421422
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
422423
struct stat st;
423424

424-
if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
425+
if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
425426
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
426427
goto out;
427428
}
@@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
431432
goto out;
432433
}
433434

434-
if (oi && oi->disk_sizep)
435-
*oi->disk_sizep = st.st_size;
435+
if (oi) {
436+
if (oi->disk_sizep)
437+
*oi->disk_sizep = st.st_size;
438+
if (oi->mtimep)
439+
*oi->mtimep = st.st_mtime;
440+
}
436441

437442
ret = 0;
438443
goto out;
@@ -446,14 +451,30 @@ static int read_object_info_from_path(struct odb_source *source,
446451
goto out;
447452
}
448453

449-
map = map_fd(fd, path, &mapsize);
454+
if (fstat(fd, &st)) {
455+
close(fd);
456+
ret = -1;
457+
goto out;
458+
}
459+
460+
mapsize = xsize_t(st.st_size);
461+
if (!mapsize) {
462+
close(fd);
463+
ret = error(_("object file %s is empty"), path);
464+
goto out;
465+
}
466+
467+
map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
468+
close(fd);
450469
if (!map) {
451470
ret = -1;
452471
goto out;
453472
}
454473

455474
if (oi->disk_sizep)
456475
*oi->disk_sizep = mapsize;
476+
if (oi->mtimep)
477+
*oi->mtimep = st.st_mtime;
457478

458479
stream_to_end = &stream;
459480

odb.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
702702
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
703703
if (oi->contentp)
704704
*oi->contentp = xmemdupz(co->buf, co->size);
705+
if (oi->mtimep)
706+
*oi->mtimep = 0;
705707
oi->whence = OI_CACHED;
706708
}
707709
return 0;

odb.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,19 @@ struct object_info {
318318
struct object_id *delta_base_oid;
319319
void **contentp;
320320

321+
/*
322+
* The time the given looked-up object has been last modified.
323+
*
324+
* Note: the mtime may be ambiguous in case the object exists multiple
325+
* times in the object database. It is thus _not_ recommended to use
326+
* this field outside of contexts where you would read every instance
327+
* of the object, like for example with `odb_for_each_object()`. As it
328+
* is impossible to say at the ODB level what the intent of the caller
329+
* is (e.g. whether to find the oldest or newest object), it is the
330+
* responsibility of the caller to disambiguate the mtimes.
331+
*/
332+
time_t *mtimep;
333+
321334
/* Response */
322335
enum {
323336
OI_CACHED,

packfile.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
15781578
hashmap_add(&delta_base_cache, &ent->ent);
15791579
}
15801580

1581-
int packed_object_info(struct packed_git *p,
1582-
off_t obj_offset, struct object_info *oi)
1581+
static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
1582+
uint32_t *maybe_index_pos, struct object_info *oi)
15831583
{
15841584
struct pack_window *w_curs = NULL;
15851585
unsigned long size;
15861586
off_t curpos = obj_offset;
15871587
enum object_type type = OBJ_NONE;
1588+
uint32_t pack_pos;
15881589
int ret;
15891590

15901591
/*
@@ -1619,16 +1620,35 @@ int packed_object_info(struct packed_git *p,
16191620
}
16201621
}
16211622

1622-
if (oi->disk_sizep) {
1623-
uint32_t pos;
1624-
if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
1623+
if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
1624+
if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
16251625
error("could not find object at offset %"PRIuMAX" "
16261626
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
16271627
ret = -1;
16281628
goto out;
16291629
}
1630+
}
1631+
1632+
if (oi->disk_sizep)
1633+
*oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
1634+
1635+
if (oi->mtimep) {
1636+
if (p->is_cruft) {
1637+
uint32_t index_pos;
1638+
1639+
if (load_pack_mtimes(p) < 0)
1640+
die(_("could not load .mtimes for cruft pack '%s'"),
1641+
pack_basename(p));
1642+
1643+
if (maybe_index_pos)
1644+
index_pos = *maybe_index_pos;
1645+
else
1646+
index_pos = pack_pos_to_index(p, pack_pos);
16301647

1631-
*oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
1648+
*oi->mtimep = nth_packed_mtime(p, index_pos);
1649+
} else {
1650+
*oi->mtimep = p->mtime;
1651+
}
16321652
}
16331653

16341654
if (oi->typep) {
@@ -1681,6 +1701,12 @@ int packed_object_info(struct packed_git *p,
16811701
return ret;
16821702
}
16831703

1704+
int packed_object_info(struct packed_git *p, off_t obj_offset,
1705+
struct object_info *oi)
1706+
{
1707+
return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
1708+
}
1709+
16841710
static void *unpack_compressed_entry(struct packed_git *p,
16851711
struct pack_window **w_curs,
16861712
off_t curpos,
@@ -2378,7 +2404,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
23782404
off_t offset = nth_packed_object_offset(pack, index_pos);
23792405
struct object_info oi = *data->request;
23802406

2381-
if (packed_object_info(pack, offset, &oi) < 0) {
2407+
if (packed_object_info_with_index_pos(pack, offset,
2408+
&index_pos, &oi) < 0) {
23822409
mark_bad_packed_object(pack, oid);
23832410
return -1;
23842411
}

0 commit comments

Comments
 (0)