-
Notifications
You must be signed in to change notification settings - Fork 881
integrate hashvault #3602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
integrate hashvault #3602
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| package hashvault | ||
|
|
||
| import "context" | ||
|
|
||
| var _ HashVault = (*NoopHashVault)(nil) | ||
|
|
||
| // NoopHashVault is a HashVault implementation that does nothing. It provides no equivocation | ||
| // protection whatsoever. It exists for two purposes: | ||
| // - tests that construct a BlockExecutor but do not exercise the vault, and | ||
| // - the explicit, operator-opted-in "hash-vault-disabled-unsafe" escape hatch. | ||
| // | ||
| // Production code must never substitute this for a real vault without a deliberate human decision. | ||
| type NoopHashVault struct{} | ||
|
|
||
| // NewNoopHashVault returns a HashVault whose methods are all no-ops. | ||
| func NewNoopHashVault() *NoopHashVault { | ||
| return &NoopHashVault{} | ||
| } | ||
|
|
||
| func (n *NoopHashVault) CommitToHash(_ context.Context, _ uint64, _ []byte) error { | ||
| return nil | ||
| } | ||
|
|
||
| func (n *NoopHashVault) Prune(_ context.Context, _ uint64) error { | ||
| return nil | ||
| } | ||
|
|
||
| func (n *NoopHashVault) Close(_ context.Context) error { | ||
| return nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ import ( | |
| "sort" | ||
|
|
||
| "github.com/gogo/protobuf/proto" | ||
| "github.com/sei-protocol/sei-chain/sei-db/state_db/sc/hashvault" | ||
| abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" | ||
| "github.com/sei-protocol/sei-chain/sei-tendermint/config" | ||
| "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/merkle" | ||
|
|
@@ -116,6 +117,7 @@ type Handshaker struct { | |
| eventBus *eventbus.EventBus | ||
| genDoc *types.GenesisDoc | ||
| consensusPolicy types.ConsensusPolicy | ||
| hashVault hashvault.HashVault | ||
|
|
||
| nBlocks int // number of blocks applied to the state | ||
| } | ||
|
|
@@ -127,6 +129,7 @@ func NewHandshaker( | |
| eventBus *eventbus.EventBus, | ||
| genDoc *types.GenesisDoc, | ||
| consensusPolicy types.ConsensusPolicy, | ||
| hashVault hashvault.HashVault, | ||
| ) *Handshaker { | ||
| return &Handshaker{ | ||
| stateStore: stateStore, | ||
|
|
@@ -135,6 +138,7 @@ func NewHandshaker( | |
| eventBus: eventBus, | ||
| genDoc: genDoc, | ||
| consensusPolicy: consensusPolicy, | ||
| hashVault: hashVault, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -401,15 +405,24 @@ func (h *Handshaker) replayBlocks( | |
| if i == finalBlock && !mutateState { | ||
| // We emit events for the index services at the final block due to the sync issue when | ||
| // the node shutdown during the block committing status. | ||
| blockExec := sm.NewBlockExecutor(h.stateStore, app, newReplayTxMempool(app), sm.EmptyEvidencePool{}, h.store, h.eventBus, sm.NopMetrics(), h.consensusPolicy) | ||
| blockExec := sm.NewBlockExecutor( | ||
| h.stateStore, | ||
| app, | ||
| newReplayTxMempool(app), | ||
| sm.EmptyEvidencePool{}, | ||
| h.store, | ||
| h.eventBus, | ||
| sm.NopMetrics(), | ||
| h.consensusPolicy, | ||
| h.hashVault) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This only covers non-Autobahn part. Is the intent to do Autobahn part later? I think you would need to:
|
||
| appHash, err = sm.ExecCommitBlock(ctx, | ||
| blockExec, app, block, h.stateStore, h.genDoc.InitialHeight, state) | ||
| blockExec, app, block, h.stateStore, h.genDoc.InitialHeight, state, h.hashVault) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| } else { | ||
| appHash, err = sm.ExecCommitBlock(ctx, | ||
| nil, app, block, h.stateStore, h.genDoc.InitialHeight, state) | ||
| nil, app, block, h.stateStore, h.genDoc.InitialHeight, state, h.hashVault) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -446,7 +459,16 @@ func (h *Handshaker) replayBlock( | |
|
|
||
| // Use stubs for both mempool and evidence pool since no transactions nor | ||
| // evidence are needed here - block already exists. | ||
| blockExec := sm.NewBlockExecutor(h.stateStore, app, newReplayTxMempool(app), sm.EmptyEvidencePool{}, h.store, h.eventBus, sm.NopMetrics(), h.consensusPolicy) | ||
| blockExec := sm.NewBlockExecutor( | ||
| h.stateStore, | ||
| app, | ||
| newReplayTxMempool(app), | ||
| sm.EmptyEvidencePool{}, | ||
| h.store, | ||
| h.eventBus, | ||
| sm.NopMetrics(), | ||
| h.consensusPolicy, | ||
| h.hashVault) | ||
|
|
||
| var err error | ||
| state, err = blockExec.ApplyBlock(ctx, state, meta.BlockID, block, nil) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ import ( | |
| "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" | ||
| tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" | ||
| "github.com/sei-protocol/sei-chain/sei-tendermint/types" | ||
|
|
||
| "github.com/sei-protocol/sei-chain/sei-db/state_db/sc/hashvault" | ||
| "github.com/sei-protocol/seilog" | ||
| otrace "go.opentelemetry.io/otel/trace" | ||
| ) | ||
|
|
@@ -60,6 +62,11 @@ type BlockExecutor struct { | |
| // below, which is a runtime atomic.Bool flipped on for the Giga executor. | ||
| consensusPolicy types.ConsensusPolicy | ||
|
|
||
| // hashVault is the block-hash equivocation guard. Every applied block's hash is committed to | ||
| // it; if it ever rejects a hash the node halts. Never nil (a no-op implementation is used when | ||
| // the operator disables the vault or in tests). | ||
| hashVault hashvault.HashVault | ||
|
|
||
| // cache the verification results over a single height | ||
| cache map[string]struct{} | ||
| } | ||
|
|
@@ -74,6 +81,7 @@ func NewBlockExecutor( | |
| eventBus *eventbus.EventBus, | ||
| metrics *Metrics, | ||
| consensusPolicy types.ConsensusPolicy, | ||
| hashVault hashvault.HashVault, | ||
| ) *BlockExecutor { | ||
| return &BlockExecutor{ | ||
| eventBus: eventBus, | ||
|
|
@@ -85,6 +93,7 @@ func NewBlockExecutor( | |
| cache: make(map[string]struct{}), | ||
| blockStore: blockStore, | ||
| consensusPolicy: consensusPolicy, | ||
| hashVault: hashVault, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -400,6 +409,13 @@ func (blockExec *BlockExecutor) ApplyBlock(ctx context.Context, state State, blo | |
| } | ||
| saveBlockTime := time.Now() | ||
| state.AppHash = fBlockRes.AppHash | ||
|
|
||
| // Commit this block's hash to the equivocation guard before saving state. See commitHashToVault. | ||
| // A returned error is a benign shutdown cancellation; genuine faults panic inside the call. | ||
| if err := commitHashToVault(ctx, blockExec.hashVault, block.Height, block.Hash()); err != nil { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the intent is to vault AppHash? block.Hash() is the Tendermint block header hash. I think you should do: instead. |
||
| return state, err | ||
| } | ||
|
|
||
| if err := blockExec.store.Save(state); err != nil { | ||
| return state, err | ||
| } | ||
|
|
@@ -421,6 +437,16 @@ func (blockExec *BlockExecutor) ApplyBlock(ctx context.Context, state State, blo | |
| } else { | ||
| logger.Debug("pruned blocks", "pruned", pruned, "retain_height", retainHeight) | ||
| } | ||
| // Align the vault's retention with what is actually still on disk, not the requested | ||
| // retainHeight: prune it to the blockstore's current base. Block/state pruning is best-effort | ||
| // and non-atomic, so a (partial) failure can leave blocks below retainHeight on disk. Pruning | ||
| // to the real base guarantees the vault boundary never advances past a block that can still be | ||
| // replayed — which would otherwise make a later CommitToHash fatally reject an on-disk height | ||
| // (ErrBelowPruneBoundary). A prune failure is GC, not equivocation: log and continue. | ||
| base := blockExec.blockStore.Base() | ||
| if err := blockExec.hashVault.Prune(ctx, uint64(base)); err != nil { //nolint:gosec // base is non-negative | ||
| logger.Error("failed to prune hashvault", "base", base, "err", err) | ||
| } | ||
|
cursor[bot] marked this conversation as resolved.
|
||
| } | ||
| blockExec.metrics.PruneBlockLatency.Observe(float64(time.Since(pruneBlockTime).Milliseconds())) | ||
| if pruneBlockSpan != nil { | ||
|
|
@@ -697,6 +723,41 @@ func FireEvents( | |
| } | ||
| } | ||
|
|
||
| // commitHashToVault records the block hash for the given height in the equivocation guard and halts | ||
| // the node on any error. It is shared by ApplyBlock (live blocks) and ExecCommitBlock (blocks caught | ||
| // up during the ABCI handshake) so every applied height is guarded identically. | ||
| // | ||
| // A genuine failure halts the node — we cannot prove the node is still committed to a single hash for | ||
| // this height, so failing closed is the only safe option. We distinguish a confirmed equivocation | ||
| // (ErrHashMismatch: never restart without human investigation) from an operational failure (I/O, | ||
| // corruption), which must not cry equivocation. | ||
| // | ||
| // Context cancellation/deadline is NOT a failure to record the hash: CommitToHash returns on its first | ||
| // line without attempting any write, and it only happens because the node is shutting down. There is no | ||
| // risk of proceeding unguarded, so we unwind cleanly by returning the error to the caller (which logs | ||
| // and aborts the apply) instead of panicking. | ||
| func commitHashToVault(ctx context.Context, vault hashvault.HashVault, height int64, hash []byte) error { | ||
| err := vault.CommitToHash(ctx, uint64(height), hash) //nolint:gosec // block height is non-negative | ||
| if err == nil { | ||
| return nil | ||
| } | ||
| if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { | ||
| logger.Info("HashVault commit aborted by context cancellation during shutdown; not recording hash", | ||
| "height", height, "err", err) | ||
| return fmt.Errorf("hashvault CommitToHash aborted at height %d: %w", height, err) | ||
| } | ||
| if errors.Is(err, hashvault.ErrHashMismatch) { | ||
| logger.Error("FATAL: HashVault detected a block-hash mismatch — the node has equivocated. "+ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Chatted with Greg today, since re-execution of a block should only happen during restart, it's fine to just panic here. I think you can print previous and current hash, then hint if the human is really really sure, he can remove directory to proceed, but warn that this may lead to slashing etc |
||
| "Halting. DO NOT RESTART WITHOUT HUMAN INTERVENTION.", | ||
| "height", height, "hash", fmt.Sprintf("%X", hash), "err", err) | ||
| } else { | ||
| logger.Error("FATAL: HashVault could not commit the block hash (operational error, not a "+ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could a transient I/O error land here? What's the behavior if the on-disk file is corrupted? If the on-disk file is corrupted maybe we should just ask human to remove or just proceed? I just don't want one corrupted bit on disk to take down the whole validator. What do you feel, log an error and proceed, but don't overwrite the corrupted entry? |
||
| "confirmed equivocation). Halting.", | ||
| "height", height, "hash", fmt.Sprintf("%X", hash), "err", err) | ||
| } | ||
| panic(fmt.Sprintf("hashvault CommitToHash failed at height %d: %v", height, err)) | ||
| } | ||
|
|
||
| //---------------------------------------------------------------------------------------------------- | ||
| // Execute block without state. TODO: eliminate | ||
|
|
||
|
|
@@ -710,6 +771,7 @@ func ExecCommitBlock( | |
| store Store, | ||
| initialHeight int64, | ||
| s State, | ||
| hashVault hashvault.HashVault, | ||
| ) ([]byte, error) { | ||
| finalizeBlockResponse, err := appConn.FinalizeBlock( | ||
| ctx, | ||
|
|
@@ -757,6 +819,13 @@ func ExecCommitBlock( | |
| return nil, err | ||
| } | ||
|
|
||
| // Guard the replayed height exactly as ApplyBlock guards live blocks, so heights caught up during | ||
| // the ABCI handshake are recorded in (and checked against) the equivocation guard. | ||
| // A returned error is a benign shutdown cancellation; genuine faults panic inside the call. | ||
| if err := commitHashToVault(ctx, hashVault, block.Height, block.Hash()); err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| // ResponseCommit has no error or log | ||
| return finalizeBlockResponse.AppHash, nil | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.