Skip to content

Commit 13e2e6d

Browse files
authored
feat: add forkchoice timing metrics (#57)
* feat: add forkchoice timing metrics * chore: remove fork choice reorg depth metric * feat: add ethlambda-metrics crate * refactor: use ethlambda-metrics instead of prometheus * refactor: move gather_default_metrics to metrics crate * chore: update lockfile
1 parent 75e536d commit 13e2e6d

15 files changed

Lines changed: 175 additions & 114 deletions

File tree

Cargo.lock

Lines changed: 12 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ members = [
77
"crates/blockchain/fork_choice",
88
"crates/blockchain/state_transition",
99
"crates/common/crypto",
10+
"crates/common/metrics",
1011
"crates/common/types",
1112
"crates/net/p2p",
1213
"crates/net/rpc",
@@ -28,6 +29,7 @@ ethlambda-blockchain = { path = "crates/blockchain" }
2829
ethlambda-fork-choice = { path = "crates/blockchain/fork_choice" }
2930
ethlambda-state-transition = { path = "crates/blockchain/state_transition" }
3031
ethlambda-crypto = { path = "crates/common/crypto" }
32+
ethlambda-metrics = { path = "crates/common/metrics" }
3133
ethlambda-types = { path = "crates/common/types" }
3234
ethlambda-p2p = { path = "crates/net/p2p" }
3335
ethlambda-rpc = { path = "crates/net/rpc" }

crates/blockchain/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ ethlambda-storage.workspace = true
1818
ethlambda-state-transition.workspace = true
1919
ethlambda-fork-choice.workspace = true
2020
ethlambda-crypto.workspace = true
21+
ethlambda-metrics.workspace = true
2122
ethlambda-types.workspace = true
2223

2324
spawned-concurrency.workspace = true
@@ -27,7 +28,6 @@ tokio.workspace = true
2728
thiserror.workspace = true
2829
tracing.workspace = true
2930

30-
prometheus.workspace = true
3131
hex.workspace = true
3232

3333
[dev-dependencies]

crates/blockchain/src/metrics.rs

Lines changed: 66 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,71 @@
11
//! Prometheus metrics for the blockchain module.
22
3+
use ethlambda_metrics::*;
4+
35
pub fn update_head_slot(slot: u64) {
4-
static LEAN_HEAD_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
5-
std::sync::LazyLock::new(|| {
6-
prometheus::register_int_gauge!("lean_head_slot", "Latest slot of the lean chain")
7-
.unwrap()
8-
});
6+
static LEAN_HEAD_SLOT: std::sync::LazyLock<IntGauge> = std::sync::LazyLock::new(|| {
7+
register_int_gauge!("lean_head_slot", "Latest slot of the lean chain").unwrap()
8+
});
99
LEAN_HEAD_SLOT.set(slot.try_into().unwrap());
1010
}
1111

1212
pub fn update_latest_justified_slot(slot: u64) {
13-
static LEAN_LATEST_JUSTIFIED_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
13+
static LEAN_LATEST_JUSTIFIED_SLOT: std::sync::LazyLock<IntGauge> =
1414
std::sync::LazyLock::new(|| {
15-
prometheus::register_int_gauge!("lean_latest_justified_slot", "Latest justified slot")
16-
.unwrap()
15+
register_int_gauge!("lean_latest_justified_slot", "Latest justified slot").unwrap()
1716
});
1817
LEAN_LATEST_JUSTIFIED_SLOT.set(slot.try_into().unwrap());
1918
}
2019

2120
pub fn update_latest_finalized_slot(slot: u64) {
22-
static LEAN_LATEST_FINALIZED_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
21+
static LEAN_LATEST_FINALIZED_SLOT: std::sync::LazyLock<IntGauge> =
2322
std::sync::LazyLock::new(|| {
24-
prometheus::register_int_gauge!("lean_latest_finalized_slot", "Latest finalized slot")
25-
.unwrap()
23+
register_int_gauge!("lean_latest_finalized_slot", "Latest finalized slot").unwrap()
2624
});
2725
LEAN_LATEST_FINALIZED_SLOT.set(slot.try_into().unwrap());
2826
}
2927

3028
pub fn update_current_slot(slot: u64) {
31-
static LEAN_CURRENT_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
32-
std::sync::LazyLock::new(|| {
33-
prometheus::register_int_gauge!("lean_current_slot", "Current slot of the lean chain")
34-
.unwrap()
35-
});
29+
static LEAN_CURRENT_SLOT: std::sync::LazyLock<IntGauge> = std::sync::LazyLock::new(|| {
30+
register_int_gauge!("lean_current_slot", "Current slot of the lean chain").unwrap()
31+
});
3632
LEAN_CURRENT_SLOT.set(slot.try_into().unwrap());
3733
}
3834

3935
pub fn update_validators_count(count: u64) {
40-
static LEAN_VALIDATORS_COUNT: std::sync::LazyLock<prometheus::IntGauge> =
41-
std::sync::LazyLock::new(|| {
42-
prometheus::register_int_gauge!(
43-
"lean_validators_count",
44-
"Number of validators managed by a node"
45-
)
46-
.unwrap()
47-
});
36+
static LEAN_VALIDATORS_COUNT: std::sync::LazyLock<IntGauge> = std::sync::LazyLock::new(|| {
37+
register_int_gauge!(
38+
"lean_validators_count",
39+
"Number of validators managed by a node"
40+
)
41+
.unwrap()
42+
});
4843
LEAN_VALIDATORS_COUNT.set(count.try_into().unwrap());
4944
}
5045

5146
pub fn update_safe_target_slot(slot: u64) {
52-
static LEAN_SAFE_TARGET_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
53-
std::sync::LazyLock::new(|| {
54-
prometheus::register_int_gauge!("lean_safe_target_slot", "Safe target slot").unwrap()
55-
});
47+
static LEAN_SAFE_TARGET_SLOT: std::sync::LazyLock<IntGauge> = std::sync::LazyLock::new(|| {
48+
register_int_gauge!("lean_safe_target_slot", "Safe target slot").unwrap()
49+
});
5650
LEAN_SAFE_TARGET_SLOT.set(slot.try_into().unwrap());
5751
}
5852

5953
pub fn set_node_info(name: &str, version: &str) {
60-
static LEAN_NODE_INFO: std::sync::LazyLock<prometheus::IntGaugeVec> =
61-
std::sync::LazyLock::new(|| {
62-
prometheus::register_int_gauge_vec!(
63-
"lean_node_info",
64-
"Node information (always 1)",
65-
&["name", "version"]
66-
)
67-
.unwrap()
68-
});
54+
static LEAN_NODE_INFO: std::sync::LazyLock<IntGaugeVec> = std::sync::LazyLock::new(|| {
55+
register_int_gauge_vec!(
56+
"lean_node_info",
57+
"Node information (always 1)",
58+
&["name", "version"]
59+
)
60+
.unwrap()
61+
});
6962
LEAN_NODE_INFO.with_label_values(&[name, version]).set(1);
7063
}
7164

7265
pub fn set_node_start_time() {
73-
static LEAN_NODE_START_TIME_SECONDS: std::sync::LazyLock<prometheus::IntGauge> =
66+
static LEAN_NODE_START_TIME_SECONDS: std::sync::LazyLock<IntGauge> =
7467
std::sync::LazyLock::new(|| {
75-
prometheus::register_int_gauge!(
68+
register_int_gauge!(
7669
"lean_node_start_time_seconds",
7770
"Timestamp when node started"
7871
)
@@ -87,9 +80,9 @@ pub fn set_node_start_time() {
8780

8881
/// Increment the valid attestations counter.
8982
pub fn inc_attestations_valid(source: &str) {
90-
static LEAN_ATTESTATIONS_VALID_TOTAL: std::sync::LazyLock<prometheus::IntCounterVec> =
83+
static LEAN_ATTESTATIONS_VALID_TOTAL: std::sync::LazyLock<IntCounterVec> =
9184
std::sync::LazyLock::new(|| {
92-
prometheus::register_int_counter_vec!(
85+
register_int_counter_vec!(
9386
"lean_attestations_valid_total",
9487
"Count of valid attestations",
9588
&["source"]
@@ -103,9 +96,9 @@ pub fn inc_attestations_valid(source: &str) {
10396

10497
/// Increment the invalid attestations counter.
10598
pub fn inc_attestations_invalid(source: &str) {
106-
static LEAN_ATTESTATIONS_INVALID_TOTAL: std::sync::LazyLock<prometheus::IntCounterVec> =
99+
static LEAN_ATTESTATIONS_INVALID_TOTAL: std::sync::LazyLock<IntCounterVec> =
107100
std::sync::LazyLock::new(|| {
108-
prometheus::register_int_counter_vec!(
101+
register_int_counter_vec!(
109102
"lean_attestations_invalid_total",
110103
"Count of invalid attestations",
111104
&["source"]
@@ -119,13 +112,41 @@ pub fn inc_attestations_invalid(source: &str) {
119112

120113
/// Increment the fork choice reorgs counter.
121114
pub fn inc_fork_choice_reorgs() {
122-
static LEAN_FORK_CHOICE_REORGS_TOTAL: std::sync::LazyLock<prometheus::IntCounter> =
115+
static LEAN_FORK_CHOICE_REORGS_TOTAL: std::sync::LazyLock<IntCounter> =
123116
std::sync::LazyLock::new(|| {
124-
prometheus::register_int_counter!(
117+
register_int_counter!(
125118
"lean_fork_choice_reorgs_total",
126119
"Count of fork choice reorganizations"
127120
)
128121
.unwrap()
129122
});
130123
LEAN_FORK_CHOICE_REORGS_TOTAL.inc();
131124
}
125+
126+
/// Start timing fork choice block processing. Records duration when the guard is dropped.
127+
pub fn time_fork_choice_block_processing() -> TimingGuard {
128+
static LEAN_FORK_CHOICE_BLOCK_PROCESSING_TIME_SECONDS: std::sync::LazyLock<Histogram> =
129+
std::sync::LazyLock::new(|| {
130+
register_histogram!(
131+
"lean_fork_choice_block_processing_time_seconds",
132+
"Duration to process a block",
133+
vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0]
134+
)
135+
.unwrap()
136+
});
137+
TimingGuard::new(&LEAN_FORK_CHOICE_BLOCK_PROCESSING_TIME_SECONDS)
138+
}
139+
140+
/// Start timing attestation validation. Records duration when the guard is dropped.
141+
pub fn time_attestation_validation() -> TimingGuard {
142+
static LEAN_ATTESTATION_VALIDATION_TIME_SECONDS: std::sync::LazyLock<Histogram> =
143+
std::sync::LazyLock::new(|| {
144+
register_histogram!(
145+
"lean_attestation_validation_time_seconds",
146+
"Duration to validate an attestation",
147+
vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0]
148+
)
149+
.unwrap()
150+
});
151+
TimingGuard::new(&LEAN_ATTESTATION_VALIDATION_TIME_SECONDS)
152+
}

crates/blockchain/src/store.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ fn update_safe_target(store: &mut Store) {
7070
/// 2. A vote cannot span backwards in time (source > target).
7171
/// 3. A vote cannot be for a future slot.
7272
fn validate_attestation(store: &Store, attestation: &Attestation) -> Result<(), StoreError> {
73+
let _timing = metrics::time_attestation_validation();
7374
let data = &attestation.data;
7475

7576
// Availability Check - We cannot count a vote if we haven't seen the blocks involved.
@@ -285,6 +286,8 @@ pub fn on_block(
285286
store: &mut Store,
286287
signed_block: SignedBlockWithAttestation,
287288
) -> Result<(), StoreError> {
289+
let _timing = metrics::time_fork_choice_block_processing();
290+
288291
// Unpack block components
289292
let block = signed_block.message.block.clone();
290293
let proposer_attestation = signed_block.message.proposer_attestation.clone();
@@ -1027,6 +1030,6 @@ fn is_reorg(old_head: H256, new_head: H256, store: &Store) -> bool {
10271030
}
10281031

10291032
// Couldn't walk back far enough (missing blocks in chain)
1030-
// Conservative: assume no reorg if we can't determine
1033+
// Assume the ancestor is behind the latest finalized block
10311034
false
10321035
}

crates/blockchain/state_transition/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ version.workspace = true
1111

1212
[dependencies]
1313
ethlambda-types.workspace = true
14+
ethlambda-metrics.workspace = true
1415

1516
thiserror.workspace = true
16-
prometheus.workspace = true
1717

1818
[dev-dependencies]
1919
serde.workspace = true

crates/blockchain/state_transition/src/metrics.rs

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
//! Prometheus metrics for state transition.
22
33
use std::sync::LazyLock;
4-
use std::time::Instant;
54

6-
use prometheus::{
7-
Histogram, IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec,
8-
};
5+
use ethlambda_metrics::*;
96

107
static LEAN_STATE_TRANSITION_SLOTS_PROCESSED_TOTAL: LazyLock<IntCounter> = LazyLock::new(|| {
118
register_int_counter!(
@@ -49,7 +46,7 @@ pub fn inc_finalizations(result: &str) {
4946
}
5047

5148
static LEAN_STATE_TRANSITION_TIME_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
52-
prometheus::register_histogram!(
49+
register_histogram!(
5350
"lean_state_transition_time_seconds",
5451
"Duration of the entire state transition",
5552
vec![0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 2.0, 2.5, 3.0, 4.0]
@@ -59,7 +56,7 @@ static LEAN_STATE_TRANSITION_TIME_SECONDS: LazyLock<Histogram> = LazyLock::new(|
5956

6057
static LEAN_STATE_TRANSITION_SLOTS_PROCESSING_TIME_SECONDS: LazyLock<Histogram> =
6158
LazyLock::new(|| {
62-
prometheus::register_histogram!(
59+
register_histogram!(
6360
"lean_state_transition_slots_processing_time_seconds",
6461
"Duration to process slots",
6562
vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0]
@@ -69,7 +66,7 @@ static LEAN_STATE_TRANSITION_SLOTS_PROCESSING_TIME_SECONDS: LazyLock<Histogram>
6966

7067
static LEAN_STATE_TRANSITION_BLOCK_PROCESSING_TIME_SECONDS: LazyLock<Histogram> =
7168
LazyLock::new(|| {
72-
prometheus::register_histogram!(
69+
register_histogram!(
7370
"lean_state_transition_block_processing_time_seconds",
7471
"Duration to process a block in state transition",
7572
vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0]
@@ -79,35 +76,14 @@ static LEAN_STATE_TRANSITION_BLOCK_PROCESSING_TIME_SECONDS: LazyLock<Histogram>
7976

8077
static LEAN_STATE_TRANSITION_ATTESTATIONS_PROCESSING_TIME_SECONDS: LazyLock<Histogram> =
8178
LazyLock::new(|| {
82-
prometheus::register_histogram!(
79+
register_histogram!(
8380
"lean_state_transition_attestations_processing_time_seconds",
8481
"Duration to process attestations",
8582
vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0]
8683
)
8784
.unwrap()
8885
});
8986

90-
/// A guard that records elapsed time to a histogram when dropped.
91-
pub struct TimingGuard {
92-
histogram: &'static Histogram,
93-
start: Instant,
94-
}
95-
96-
impl TimingGuard {
97-
fn new(histogram: &'static Histogram) -> Self {
98-
Self {
99-
histogram,
100-
start: Instant::now(),
101-
}
102-
}
103-
}
104-
105-
impl Drop for TimingGuard {
106-
fn drop(&mut self) {
107-
self.histogram.observe(self.start.elapsed().as_secs_f64());
108-
}
109-
}
110-
11187
/// Start timing state transition. Records duration when the guard is dropped.
11288
pub fn time_state_transition() -> TimingGuard {
11389
TimingGuard::new(&LEAN_STATE_TRANSITION_TIME_SECONDS)

0 commit comments

Comments
 (0)