Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions sei-db/db_engine/litt/disktable/segment/segment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,70 @@ func reloadSegmentExpectingRecovery(t *testing.T, segmentPath *SegmentPath, inde
return keys, seg
}

// TestSealLoadedSegmentSingleShardPrefix locks the single-shard durability invariant: with one shard
// all values append to a single value file and all keys to a single key file in write order, so after
// a crash the surviving Put groups form a contiguous PREFIX of the write order — never a gapped
// subset. Each sub-case seals a segment, truncates one file to simulate a torn tail, reloads, and
// asserts the survivors are exactly key000..key{j-1}.
func TestSealLoadedSegmentSingleShardPrefix(t *testing.T) {
t.Parallel()

const (
n = 8
valueLen = 10 // each value below is exactly 10 bytes
)
keyFor := func(i int) []byte { return []byte(fmt.Sprintf("key%03d", i)) }
valueFor := func(i int) []byte { return []byte(fmt.Sprintf("val%07d", i)) }

// writeRun writes n standalone Puts to a fresh single-shard segment, seals it, and flips the
// metadata back to unsealed to simulate a crash before the seal completed.
writeRun := func(t *testing.T) (*SegmentPath, uint32) {
seg, segmentPath, index := newSingleShardSegment(t)
for i := 0; i < n; i++ {
writeNoErr(t, seg, &types.PutRequest{Key: keyFor(i), Value: valueFor(i)})
}
_, err := seg.Seal(time.Now())
require.NoError(t, err)
markSegmentUnsealed(t, segmentPath, index)
return segmentPath, index
}

// assertPrefix asserts the recovered keys are exactly key000..key{survivors-1} in write order with
// no gaps, and that every survivor's value range fits within the (possibly truncated) value file.
assertPrefix := func(t *testing.T, keys []*types.ScopedKey, survivors int, valueFileSize int) {
require.Len(t, keys, survivors)
for i := 0; i < survivors; i++ {
require.Equal(t, string(keyFor(i)), string(keys[i].Key), "record %d", i)
end := int(keys[i].Address.Offset()) + int(keys[i].Address.ValueSize())
require.LessOrEqual(t, end, valueFileSize, "survivor %d value must fit in the value file", i)
}
}

t.Run("value_file_torn_mid_value", func(t *testing.T) {
t.Parallel()
segmentPath, index := writeRun(t)
// Values occupy [i*10,(i+1)*10); total 80 bytes. Truncate to 55, landing inside value 5
// ([50,60)). Survivors are the values whose end <= 55, i.e. key000..key004.
const truncatedSize = 55
truncateValueFileBy(t, segmentPath, index, 0, n*valueLen-truncatedSize)
keys, _ := reloadSegmentExpectingRecovery(t, segmentPath, index)
assertPrefix(t, keys, 5, truncatedSize)
})

t.Run("key_file_torn_mid_record", func(t *testing.T) {
t.Parallel()
segmentPath, index := writeRun(t)
// Keys are fixed length, so every key record is the same size. Cut 3*r-1 bytes from the tail:
// records for key005..key007 are removed (key005's record is left 1 byte short, so it is torn
// and discarded). The value file is intact, so the survivors are bounded by the key-file
// prefix: key000..key004.
r := int(keyRecordSize(keyFor(0)))
truncateKeyFileBy(t, segmentPath, index, 3*r-1)
keys, _ := reloadSegmentExpectingRecovery(t, segmentPath, index)
assertPrefix(t, keys, 5, n*valueLen)
})
}

// TestSealLoadedSegmentGroupAtomicity covers all of the torn-write scenarios that
// sealLoadedSegment must handle. Each subtest builds a sealed segment, manually corrupts it on
// disk to simulate a crash mid-write, flips the metadata's sealed bit back to false, then reloads
Expand Down
11 changes: 11 additions & 0 deletions sei-db/db_engine/litt/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ type Table interface {
// of the value is 2^32 bytes. This database has been optimized under the assumption that values
// are generally much larger than keys. This affects performance, but not correctness.
//
// Although writes are individually atomic, the DB makes no guarantees about atomicity of multiple writes in
// aggregate. That is to say, if a caller writes A and then B and the DB crashes before flushing, it may be the
// case that B is persisted but A is not. The exception to this rule is if the sharding factor for this table
// is 1, in which case the database guarantees that writes become crash durable in the order they were issued.
//
// It is not safe to modify the byte slices passed to this function after the call
// (the key bytes, the value bytes, and every secondary key's bytes).
Put(key []byte, value []byte, secondaryKeys ...*types.SecondaryKey) error
Expand All @@ -46,6 +51,12 @@ type Table interface {
// of a value is 2^32 bytes. This database has been optimized under the assumption that values
// are generally much larger than keys. This affects performance, but not correctness.
//
// Although writes in a batch are individually atomic, the DB makes no guarantees about atomicity of multiple
// writes in aggregate. That is to say, if a caller writes A and then B in a batch and the DB crashes before
// flushing, it may be the case that B is persisted but A is not. The exception to this rule is if the sharding
// factor for this table is 1, in which case the database guarantees that writes become crash durable in the
// order they were issued.
//
// It is not safe to modify the byte slices passed to this function after the call
// (including the key byte slices, the value byte slices, and every secondary key's bytes).
PutBatch(batch []*types.PutRequest) error
Expand Down
4 changes: 4 additions & 0 deletions sei-db/db_engine/litt/table_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ type TableConfig struct {
// The default is 8. Must be in the range [1, MaxShardingFactor]. Storing this as a uint8 makes it structurally
// impossible to configure more shards than the on-disk format can address. May be changed at runtime via
// Table.SetShardingFactor().
//
// Normally, writes to a table are individually atomic but not atomic in aggregate. That is to say, if a caller
// writes A and then B and the DB crashes before flushing, it may be the case that B is persisted but A is not.
// However, if the sharding factor is 1, then all writes are made crash durable in the order they were issued.
ShardingFactor uint8

// The size of the write cache, in bytes, for the table. A write cache stores recently written values for fast
Expand Down
78 changes: 0 additions & 78 deletions sei-db/ledger_db/block/block_db.go

This file was deleted.

Loading
Loading