Skip to content

Commit b647c7e

Browse files
committed
Implement Devnet-4 metrics from leanMetrics PR #29
Add block production metrics (building time, payload aggregation time, aggregated payload count, success/failure counters), gossip message size histograms (block, attestation, aggregation), node sync status gauge (idle/syncing/synced), and update committee signature aggregation histogram buckets to wider range.
1 parent 12709ae commit b647c7e

5 files changed

Lines changed: 199 additions & 11 deletions

File tree

crates/blockchain/src/lib.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ impl BlockChain {
4747
is_aggregator: bool,
4848
) -> BlockChain {
4949
metrics::set_is_aggregator(is_aggregator);
50+
metrics::set_node_sync_status("idle");
5051
let genesis_time = store.config().genesis_time;
5152
let key_manager = key_manager::KeyManager::new(validator_keys);
5253
let handle = BlockChainServer {
@@ -217,11 +218,14 @@ impl BlockChainServer {
217218
fn propose_block(&mut self, slot: u64, validator_id: u64) {
218219
info!(%slot, %validator_id, "We are the proposer for this slot");
219220

221+
let _timing = metrics::time_block_building();
222+
220223
// Build the block with attestation signatures
221224
let Ok((block, attestation_signatures, _post_checkpoints)) =
222225
store::produce_block_with_signatures(&mut self.store, slot, validator_id)
223226
.inspect_err(|err| error!(%slot, %validator_id, %err, "Failed to build block"))
224227
else {
228+
metrics::inc_block_building_failures();
225229
return;
226230
};
227231

@@ -232,6 +236,7 @@ impl BlockChainServer {
232236
.sign_block_root(validator_id, slot as u32, &block_root)
233237
.inspect_err(|err| error!(%slot, %validator_id, %err, "Failed to sign block root"))
234238
else {
239+
metrics::inc_block_building_failures();
235240
return;
236241
};
237242

@@ -249,9 +254,12 @@ impl BlockChainServer {
249254
// Process the block locally before publishing
250255
if let Err(err) = self.process_block(signed_block.clone()) {
251256
error!(%slot, %validator_id, %err, "Failed to process built block");
257+
metrics::inc_block_building_failures();
252258
return;
253259
};
254260

261+
metrics::inc_block_building_success();
262+
255263
// Publish to gossip network
256264
if let Some(ref p2p) = self.p2p {
257265
let _ = p2p
@@ -264,10 +272,26 @@ impl BlockChainServer {
264272

265273
fn process_block(&mut self, signed_block: SignedBlock) -> Result<(), StoreError> {
266274
store::on_block(&mut self.store, signed_block)?;
267-
metrics::update_head_slot(self.store.head_slot());
275+
let head_slot = self.store.head_slot();
276+
metrics::update_head_slot(head_slot);
268277
metrics::update_latest_justified_slot(self.store.latest_justified().slot);
269278
metrics::update_latest_finalized_slot(self.store.latest_finalized().slot);
270279
metrics::update_validators_count(self.key_manager.validator_ids().len() as u64);
280+
281+
// Update sync status based on head slot vs wall clock slot
282+
let genesis_time_ms = self.store.config().genesis_time * 1000;
283+
let now_ms = std::time::SystemTime::now()
284+
.duration_since(std::time::UNIX_EPOCH)
285+
.unwrap()
286+
.as_millis() as u64;
287+
let current_slot = now_ms.saturating_sub(genesis_time_ms) / MILLISECONDS_PER_SLOT;
288+
let status = if head_slot >= current_slot {
289+
"synced"
290+
} else {
291+
"syncing"
292+
};
293+
metrics::set_node_sync_status(status);
294+
271295
for table in ALL_TABLES {
272296
metrics::update_table_bytes(table.name(), self.store.estimate_table_bytes(table));
273297
}

crates/blockchain/src/metrics.rs

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static LEAN_COMMITTEE_SIGNATURES_AGGREGATION_TIME_SECONDS: std::sync::LazyLock<H
261261
register_histogram!(
262262
"lean_committee_signatures_aggregation_time_seconds",
263263
"Time taken to aggregate committee signatures",
264-
vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0]
264+
vec![0.05, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0]
265265
)
266266
.unwrap()
267267
});
@@ -276,6 +276,58 @@ static LEAN_FORK_CHOICE_REORG_DEPTH: std::sync::LazyLock<Histogram> =
276276
.unwrap()
277277
});
278278

279+
// --- Block Production ---
280+
281+
static LEAN_BLOCK_AGGREGATED_PAYLOADS: std::sync::LazyLock<Histogram> =
282+
std::sync::LazyLock::new(|| {
283+
register_histogram!(
284+
"lean_block_aggregated_payloads",
285+
"Number of aggregated_payloads in a block",
286+
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0]
287+
)
288+
.unwrap()
289+
});
290+
291+
static LEAN_BLOCK_BUILDING_PAYLOAD_AGGREGATION_TIME_SECONDS: std::sync::LazyLock<Histogram> =
292+
std::sync::LazyLock::new(|| {
293+
register_histogram!(
294+
"lean_block_building_payload_aggregation_time_seconds",
295+
"Time taken to build aggregated_payloads during block building",
296+
vec![0.1, 0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0]
297+
)
298+
.unwrap()
299+
});
300+
301+
static LEAN_BLOCK_BUILDING_TIME_SECONDS: std::sync::LazyLock<Histogram> =
302+
std::sync::LazyLock::new(|| {
303+
register_histogram!(
304+
"lean_block_building_time_seconds",
305+
"Time taken to build a block",
306+
vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0]
307+
)
308+
.unwrap()
309+
});
310+
311+
static LEAN_BLOCK_BUILDING_SUCCESS_TOTAL: std::sync::LazyLock<IntCounter> =
312+
std::sync::LazyLock::new(|| {
313+
register_int_counter!(
314+
"lean_block_building_success_total",
315+
"Successful block builds"
316+
)
317+
.unwrap()
318+
});
319+
320+
static LEAN_BLOCK_BUILDING_FAILURES_TOTAL: std::sync::LazyLock<IntCounter> =
321+
std::sync::LazyLock::new(|| {
322+
register_int_counter!("lean_block_building_failures_total", "Failed block builds").unwrap()
323+
});
324+
325+
// --- Sync Status ---
326+
327+
static LEAN_NODE_SYNC_STATUS: std::sync::LazyLock<IntGaugeVec> = std::sync::LazyLock::new(|| {
328+
register_int_gauge_vec!("lean_node_sync_status", "Node sync status", &["status"]).unwrap()
329+
});
330+
279331
// --- Initialization ---
280332

281333
/// Register all metrics with the Prometheus registry so they appear in `/metrics` from startup.
@@ -315,6 +367,14 @@ pub fn init() {
315367
std::sync::LazyLock::force(&LEAN_PQ_SIG_AGGREGATED_SIGNATURES_VERIFICATION_TIME_SECONDS);
316368
std::sync::LazyLock::force(&LEAN_COMMITTEE_SIGNATURES_AGGREGATION_TIME_SECONDS);
317369
std::sync::LazyLock::force(&LEAN_FORK_CHOICE_REORG_DEPTH);
370+
// Block production
371+
std::sync::LazyLock::force(&LEAN_BLOCK_AGGREGATED_PAYLOADS);
372+
std::sync::LazyLock::force(&LEAN_BLOCK_BUILDING_PAYLOAD_AGGREGATION_TIME_SECONDS);
373+
std::sync::LazyLock::force(&LEAN_BLOCK_BUILDING_TIME_SECONDS);
374+
std::sync::LazyLock::force(&LEAN_BLOCK_BUILDING_SUCCESS_TOTAL);
375+
std::sync::LazyLock::force(&LEAN_BLOCK_BUILDING_FAILURES_TOTAL);
376+
// Sync status
377+
std::sync::LazyLock::force(&LEAN_NODE_SYNC_STATUS);
318378
}
319379

320380
// --- Public API ---
@@ -476,3 +536,37 @@ pub fn set_attestation_committee_count(count: u64) {
476536
pub fn observe_fork_choice_reorg_depth(depth: u64) {
477537
LEAN_FORK_CHOICE_REORG_DEPTH.observe(depth as f64);
478538
}
539+
540+
/// Observe the number of aggregated payloads in a built block.
541+
pub fn observe_block_aggregated_payloads(count: usize) {
542+
LEAN_BLOCK_AGGREGATED_PAYLOADS.observe(count as f64);
543+
}
544+
545+
/// Start timing payload aggregation during block building. Records duration when the guard is dropped.
546+
pub fn time_block_building_payload_aggregation() -> TimingGuard {
547+
TimingGuard::new(&LEAN_BLOCK_BUILDING_PAYLOAD_AGGREGATION_TIME_SECONDS)
548+
}
549+
550+
/// Start timing block building. Records duration when the guard is dropped.
551+
pub fn time_block_building() -> TimingGuard {
552+
TimingGuard::new(&LEAN_BLOCK_BUILDING_TIME_SECONDS)
553+
}
554+
555+
/// Increment the successful block builds counter.
556+
pub fn inc_block_building_success() {
557+
LEAN_BLOCK_BUILDING_SUCCESS_TOTAL.inc();
558+
}
559+
560+
/// Increment the failed block builds counter.
561+
pub fn inc_block_building_failures() {
562+
LEAN_BLOCK_BUILDING_FAILURES_TOTAL.inc();
563+
}
564+
565+
/// Set the node sync status. Sets the given status label to 1 and all others to 0.
566+
pub fn set_node_sync_status(status: &str) {
567+
for label in &["idle", "syncing", "synced"] {
568+
LEAN_NODE_SYNC_STATUS
569+
.with_label_values(&[label])
570+
.set(i64::from(*label == status));
571+
}
572+
}

crates/blockchain/src/store.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,14 +1015,19 @@ pub fn produce_block_with_signatures(
10151015

10161016
let known_block_roots = store.get_block_roots();
10171017

1018-
let (block, signatures, post_checkpoints) = build_block(
1019-
&head_state,
1020-
slot,
1021-
validator_index,
1022-
head_root,
1023-
&known_block_roots,
1024-
&aggregated_payloads,
1025-
)?;
1018+
let (block, signatures, post_checkpoints) = {
1019+
let _timing = metrics::time_block_building_payload_aggregation();
1020+
build_block(
1021+
&head_state,
1022+
slot,
1023+
validator_index,
1024+
head_root,
1025+
&known_block_roots,
1026+
&aggregated_payloads,
1027+
)?
1028+
};
1029+
1030+
metrics::observe_block_aggregated_payloads(signatures.len());
10261031

10271032
Ok((block, signatures, post_checkpoints))
10281033
}

crates/net/p2p/src/gossipsub/handler.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use super::{
1515
attestation_subnet_topic,
1616
},
1717
};
18-
use crate::P2PServer;
18+
use crate::{P2PServer, metrics};
1919

2020
pub async fn handle_gossipsub_message(server: &mut P2PServer, event: Event) {
2121
let Event::Message {
@@ -34,6 +34,7 @@ pub async fn handle_gossipsub_message(server: &mut P2PServer, event: Event) {
3434
else {
3535
return;
3636
};
37+
metrics::observe_gossip_block_size(uncompressed_data.len());
3738

3839
let Ok(signed_block) = SignedBlock::from_ssz_bytes(&uncompressed_data)
3940
.inspect_err(|err| error!(?err, "Failed to decode gossipped block"))
@@ -65,6 +66,7 @@ pub async fn handle_gossipsub_message(server: &mut P2PServer, event: Event) {
6566
else {
6667
return;
6768
};
69+
metrics::observe_gossip_aggregation_size(uncompressed_data.len());
6870

6971
let Ok(aggregation) = SignedAggregatedAttestation::from_ssz_bytes(&uncompressed_data)
7072
.inspect_err(|err| error!(?err, "Failed to decode gossipped aggregation"))
@@ -94,6 +96,7 @@ pub async fn handle_gossipsub_message(server: &mut P2PServer, event: Event) {
9496
else {
9597
return;
9698
};
99+
metrics::observe_gossip_attestation_size(uncompressed_data.len());
97100

98101
let Ok(signed_attestation) = SignedAttestation::from_ssz_bytes(&uncompressed_data)
99102
.inspect_err(|err| error!(?err, "Failed to decode gossipped attestation"))

crates/net/p2p/src/metrics.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,68 @@ static LEAN_PEER_DISCONNECTION_EVENTS_TOTAL: LazyLock<IntCounterVec> = LazyLock:
6868
.unwrap()
6969
});
7070

71+
// --- Gossip Message Size Histograms ---
72+
73+
static LEAN_GOSSIP_BLOCK_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
74+
register_histogram!(
75+
"lean_gossip_block_size_bytes",
76+
"Bytes size of a gossip block message",
77+
vec![
78+
10_000.0,
79+
50_000.0,
80+
100_000.0,
81+
250_000.0,
82+
500_000.0,
83+
1_000_000.0,
84+
2_000_000.0,
85+
5_000_000.0
86+
]
87+
)
88+
.unwrap()
89+
});
90+
91+
static LEAN_GOSSIP_ATTESTATION_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
92+
register_histogram!(
93+
"lean_gossip_attestation_size_bytes",
94+
"Bytes size of a gossip attestation message",
95+
vec![512.0, 1024.0, 2048.0, 4096.0, 8192.0, 16384.0]
96+
)
97+
.unwrap()
98+
});
99+
100+
static LEAN_GOSSIP_AGGREGATION_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
101+
register_histogram!(
102+
"lean_gossip_aggregation_size_bytes",
103+
"Bytes size of a gossip aggregated attestation message",
104+
vec![
105+
1024.0,
106+
4096.0,
107+
16384.0,
108+
65536.0,
109+
131_072.0,
110+
262_144.0,
111+
524_288.0,
112+
1_048_576.0
113+
]
114+
)
115+
.unwrap()
116+
});
117+
118+
/// Observe the size of a gossip block message.
119+
pub fn observe_gossip_block_size(bytes: usize) {
120+
LEAN_GOSSIP_BLOCK_SIZE_BYTES.observe(bytes as f64);
121+
}
122+
123+
/// Observe the size of a gossip attestation message.
124+
pub fn observe_gossip_attestation_size(bytes: usize) {
125+
LEAN_GOSSIP_ATTESTATION_SIZE_BYTES.observe(bytes as f64);
126+
}
127+
128+
/// Observe the size of a gossip aggregated attestation message.
129+
pub fn observe_gossip_aggregation_size(bytes: usize) {
130+
LEAN_GOSSIP_AGGREGATION_SIZE_BYTES.observe(bytes as f64);
131+
}
132+
71133
/// Set the attestation committee subnet gauge.
72134
pub fn set_attestation_committee_subnet(subnet_id: u64) {
73135
static LEAN_ATTESTATION_COMMITTEE_SUBNET: LazyLock<IntGauge> = LazyLock::new(|| {

0 commit comments

Comments
 (0)