mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-06 18:26:51 +00:00
feat(telemetry): make consensus panels show real consensus timing and rounds
The consensus duration panels plotted span wall-clock (traces_span_metrics_duration_milliseconds), which is ~3-8 ms of instrumentation overhead, not the real consensus time (~3000 ms). And the close-time value panels plotted an ever-rising absolute epoch line. Rework them to answer the actual operational questions, all from attributes that already exist on the consensus spans: - Time to Reach Consensus (p50/p95) and Average Time to Reach Consensus: round_time_ms on consensus.accept — the wall-clock to agree a ledger. - Consensus Rounds per Ledger (Establish Count): avg and max of establish_count on consensus.establish — how many proposal rounds it took to converge (1 = first proposal). - Previous Round Time per Ledger: previous_round_time_ms on consensus.round. Reorder the dashboard into an investigation flow: health/throughput -> time-to-consensus and rounds -> ledger close/apply timing -> close-time detail -> failures/mode/mismatch. Assign stable sequential panel ids. Verified each query returns data via the Grafana datasource proxy (p95 ~4096 ms, avg ~2825 ms, rounds ~2). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -8,123 +8,6 @@
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "Consensus Round Duration",
|
||||
"description": "p95 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp) measures the time to process an accepted ledger including transaction application and state finalization. The span carries proposers and round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept\"}[5m])))",
|
||||
"legendFormat": "P95 Round Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Consensus Proposals Sent Rate",
|
||||
"description": "Rate at which this node sends consensus proposals to the network. Sourced from the consensus.proposal.send span (RCLConsensus.cpp) which fires each time the node proposes a transaction set. The span carries xrpl.consensus.round identifying the consensus round number. A healthy proposing node should show steady proposal output.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.proposal.send\"}[5m]))",
|
||||
"legendFormat": "Proposals / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": {
|
||||
"axisLabel": "Proposals / Sec",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Ledger Close Duration",
|
||||
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp) measures the time from when consensus triggers a ledger close to completion. Carries ledger_seq and consensus_mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m])))",
|
||||
"legendFormat": "P95 Close Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Validation Send Rate",
|
||||
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp). Each validation confirms the node has fully validated a ledger. The span carries ledger_seq and proposing. Should closely track the ledger close rate when the node is healthy.",
|
||||
@@ -132,8 +15,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -155,73 +38,8 @@
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Ledger Apply Duration (doAccept)",
|
||||
"description": "Time spent applying the consensus result to build a new ledger. Measured by the consensus.accept.apply span in doAccept().",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"legendFormat": "P95 Apply Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Close Time Agreement",
|
||||
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": {
|
||||
"axisLabel": "Rounds / Sec",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
"id": 1
|
||||
},
|
||||
{
|
||||
"title": "Consensus Mode Over Time",
|
||||
@@ -230,8 +48,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -260,7 +78,48 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 2
|
||||
},
|
||||
{
|
||||
"title": "Consensus Proposals Sent Rate",
|
||||
"description": "Rate at which this node sends consensus proposals to the network. Sourced from the consensus.proposal.send span (RCLConsensus.cpp) which fires each time the node proposes a transaction set. The span carries xrpl.consensus.round identifying the consensus round number. A healthy proposing node should show steady proposal output.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.proposal.send\"}[5m]))",
|
||||
"legendFormat": "Proposals / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": {
|
||||
"axisLabel": "Proposals / Sec",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 3
|
||||
},
|
||||
{
|
||||
"title": "Accept vs Close Rate",
|
||||
@@ -270,7 +129,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
"y": 8
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -306,7 +165,8 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 4
|
||||
},
|
||||
{
|
||||
"title": "Validation vs Close Rate",
|
||||
@@ -316,7 +176,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 32
|
||||
"y": 16
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -352,7 +212,320 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 5
|
||||
},
|
||||
{
|
||||
"title": "Time to Reach Consensus (p50/p95)",
|
||||
"description": "Wall-clock time for the network to agree a ledger, from the round_time_ms attribute on consensus.accept. This is the real consensus latency, not span overhead.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept\" && resource.service.instance.id=~\"$node\"} | quantile_over_time(span.round_time_ms, .5)",
|
||||
"legendFormat": "P50 Time to Consensus [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"refId": "B",
|
||||
"query": "{name=\"consensus.accept\" && resource.service.instance.id=~\"$node\"} | quantile_over_time(span.round_time_ms, .95)",
|
||||
"legendFormat": "P95 Time to Consensus [{{resource.service.instance.id}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Time to Consensus (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 6
|
||||
},
|
||||
{
|
||||
"title": "Average Time to Reach Consensus",
|
||||
"description": "Mean time to agree a ledger (round_time_ms on consensus.accept), averaged over the window. Tracks the typical consensus latency trend.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept\" && resource.service.instance.id=~\"$node\"} | avg_over_time(span.round_time_ms)",
|
||||
"legendFormat": "Avg Time to Consensus [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Time to Consensus (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 7
|
||||
},
|
||||
{
|
||||
"title": "Consensus Rounds per Ledger (Establish Count)",
|
||||
"description": "Number of establish-phase iterations (proposal rounds) before agreement, from establish_count on consensus.establish. 1 = agreed on first proposal; higher = more rounds of re-proposing to converge.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc",
|
||||
"maxHeight": 500
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "max", "lastNotNull"]
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.establish\" && resource.service.instance.id=~\"$node\"} | avg_over_time(span.establish_count)",
|
||||
"legendFormat": "Avg Rounds [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.establish\" && resource.service.instance.id=~\"$node\"} | max_over_time(span.establish_count)",
|
||||
"legendFormat": "Max Rounds [{{resource.service.instance.id}}]",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"axisLabel": "Rounds per Ledger",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 8
|
||||
},
|
||||
{
|
||||
"title": "Previous Round Time per Ledger",
|
||||
"description": "Duration of the previous consensus round (previous_round_time_ms on consensus.round). Complements time-to-consensus with the prior round's cost.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 32
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "tempo",
|
||||
"uid": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.round\" && resource.service.instance.id=~\"$node\"} | quantile_over_time(span.previous_round_time_ms, .95)",
|
||||
"legendFormat": "P95 Previous Round [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Round Time (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 9
|
||||
},
|
||||
{
|
||||
"title": "Position Update Duration",
|
||||
"description": "p95 duration of the consensus.update_positions span, which tallies disputes and updates this node's position each round. Long durations indicate heavy dispute resolution or slow convergence on close time.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 32
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"consensus.update_positions\"}[5m])))",
|
||||
"legendFormat": "P95 Update [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 10
|
||||
},
|
||||
{
|
||||
"title": "Ledger Close Duration",
|
||||
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp) measures the time from when consensus triggers a ledger close to completion. Carries ledger_seq and consensus_mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 40
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m])))",
|
||||
"legendFormat": "P95 Close Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 11
|
||||
},
|
||||
{
|
||||
"title": "Ledger Apply Duration (doAccept)",
|
||||
"description": "Time spent applying the consensus result to build a new ledger. Measured by the consensus.accept.apply span in doAccept().",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 40
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"legendFormat": "P95 Apply Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 12
|
||||
},
|
||||
{
|
||||
"title": "Consensus Accept Duration Heatmap",
|
||||
@@ -361,8 +534,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 32
|
||||
"x": 0,
|
||||
"y": 48
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -388,7 +561,42 @@
|
||||
"defaults": {
|
||||
"unit": "ms"
|
||||
}
|
||||
}
|
||||
},
|
||||
"id": 13
|
||||
},
|
||||
{
|
||||
"title": "Close Time Agreement",
|
||||
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 48
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": {
|
||||
"axisLabel": "Rounds / Sec",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"id": 14
|
||||
},
|
||||
{
|
||||
"title": "Close Time: Raw Proposals (Per Node)",
|
||||
@@ -398,7 +606,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 40
|
||||
"y": 56
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -434,7 +642,8 @@
|
||||
"legendFormat": "Raw Close Time [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"id": 15
|
||||
},
|
||||
{
|
||||
"title": "Close Time: Effective / Quantized",
|
||||
@@ -444,7 +653,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 40
|
||||
"y": 56
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -480,7 +689,8 @@
|
||||
"legendFormat": "Effective Close Time [{{resource.service.instance.id}}]",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"id": 16
|
||||
},
|
||||
{
|
||||
"title": "Close Time Vote Bins & Resolution",
|
||||
@@ -490,7 +700,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 48
|
||||
"y": 64
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -570,7 +780,8 @@
|
||||
"legendFormat": "Avg Resolution (ms)",
|
||||
"refId": "B"
|
||||
}
|
||||
]
|
||||
],
|
||||
"id": 17
|
||||
},
|
||||
{
|
||||
"title": "Close Time Resolution Direction",
|
||||
@@ -580,7 +791,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 48
|
||||
"y": 64
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -617,7 +828,8 @@
|
||||
"legendFormat": "Resolution Direction [{{span.resolution_direction}}]",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"id": 18
|
||||
},
|
||||
{
|
||||
"title": "Close Time Bin Distribution",
|
||||
@@ -625,9 +837,9 @@
|
||||
"type": "barchart",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 56
|
||||
"y": 72
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -664,7 +876,8 @@
|
||||
"legendFormat": "{{span.close_time_vote_bins}} Vote Bins",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"id": 19
|
||||
},
|
||||
{
|
||||
"title": "Consensus Outcome Distribution",
|
||||
@@ -672,9 +885,9 @@
|
||||
"type": "piechart",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 64
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 72
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
@@ -700,7 +913,8 @@
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 20
|
||||
},
|
||||
{
|
||||
"title": "Consensus Failures Over Time",
|
||||
@@ -708,9 +922,9 @@
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 16,
|
||||
"x": 8,
|
||||
"y": 64
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 80
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -746,131 +960,8 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Consensus Round Duration (Full Round)",
|
||||
"description": "p95 duration of the full consensus round. The consensus.round span (RCLConsensus.cpp startRound) wraps an entire round end-to-end. Filterable by consensus mode. This is the single most important consensus-health signal; rising round time precedes ledger-age alarms.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 72
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.round\"}[5m])))",
|
||||
"legendFormat": "P95 Round [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Consensus Phase Duration (Open vs Establish)",
|
||||
"description": "p95 duration of the open phase (transaction collection) vs the establish phase (proposal convergence). The consensus.phase.open and consensus.establish spans decompose round latency, so an operator can tell whether slowness is in collecting transactions or reaching agreement.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 72
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"consensus.phase.open\"}[5m])))",
|
||||
"legendFormat": "P95 Open Phase [{{exported_instance}}]"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"consensus.establish\"}[5m])))",
|
||||
"legendFormat": "P95 Establish Phase [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Position Update Duration",
|
||||
"description": "p95 duration of the consensus.update_positions span, which tallies disputes and updates this node's position each round. Long durations indicate heavy dispute resolution or slow convergence on close time.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 80
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"consensus.update_positions\"}[5m])))",
|
||||
"legendFormat": "P95 Update [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"custom": {
|
||||
"axisLabel": "Duration (ms)",
|
||||
"spanNulls": true,
|
||||
"insertNulls": false,
|
||||
"showPoints": "auto",
|
||||
"pointSize": 3
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
"id": 21
|
||||
},
|
||||
{
|
||||
"title": "Consensus Stall Rate",
|
||||
@@ -916,7 +1007,8 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 22
|
||||
},
|
||||
{
|
||||
"title": "Consensus Mode-Change Rate by Target Mode",
|
||||
@@ -924,7 +1016,7 @@
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 88
|
||||
},
|
||||
@@ -955,7 +1047,8 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 23
|
||||
},
|
||||
{
|
||||
"title": "Ledger History Mismatch Rate by Reason",
|
||||
@@ -963,9 +1056,9 @@
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 96
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 88
|
||||
},
|
||||
"options": {
|
||||
"tooltip": {
|
||||
@@ -994,7 +1087,8 @@
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
}
|
||||
},
|
||||
"id": 24
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
|
||||
Reference in New Issue
Block a user