mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-05 17:56:49 +00:00
fix(telemetry): align consensus_mode spanmetrics label with emitted attribute
The spanmetrics connector dimension was `xrpl.consensus.mode`, but the code
emits the span attribute under the bare key `consensus_mode` (matching every
other dimension after the Phase 6 rename). The mismatch left the
`xrpl_consensus_mode` Prometheus label empty, so the Consensus Health
"Consensus Mode Over Time" panel and the `$consensus_mode` template variable
(which filters every panel) matched no live series.
- otel-collector-config.yaml: dimension `xrpl.consensus.mode` -> `consensus_mode`
- consensus-health.json: 11 label refs `xrpl_consensus_mode` -> `consensus_mode`
(the `$consensus_mode` Grafana variable name is unchanged)
- telemetry-runbook.md: refresh the stale spanmetrics label table to the bare
names actually emitted (command/rpc_status/consensus_mode/local/
proposal_trusted/validation_trusted), fix dotted->bare attribute names in
span tables and TraceQL examples (tx_hash, ledger_seq, consensus_round_id,
consensus_ledger_id, consensus_round, tx_id event attr), correct the
consensus_round_id query to int (not quoted string), and fix the
load_type value query ("exception_rpc" -> "exceptioned RPC").
Verified against the live stack: Tempo span tags confirm bare attribute keys
(consensus_mode, ledger_seq, tx_hash, ...); the populated xrpl_consensus_mode
series in Prometheus is stale retained data from an older build.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -29,14 +29,14 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept\"}[5m])))",
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept\"}[5m])))",
|
||||
"legendFormat": "P95 Round Duration [{{exported_instance}}]"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept\"}[5m])))",
|
||||
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept\"}[5m])))",
|
||||
"legendFormat": "P50 Round Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -75,7 +75,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.proposal.send\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.proposal.send\"}[5m]))",
|
||||
"legendFormat": "Proposals / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -114,7 +114,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m])))",
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m])))",
|
||||
"legendFormat": "P95 Close Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -153,7 +153,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.validation.send\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.validation.send\"}[5m]))",
|
||||
"legendFormat": "Validations / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -179,14 +179,14 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"legendFormat": "P95 Apply Duration [{{exported_instance}}]"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.accept.apply\"}[5m])))",
|
||||
"legendFormat": "P50 Apply Duration [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -219,7 +219,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -258,8 +258,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_consensus_mode, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m]))",
|
||||
"legendFormat": "{{xrpl_consensus_mode}} [{{exported_instance}}]"
|
||||
"expr": "sum by (consensus_mode, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", consensus_mode=~\"$consensus_mode\", span_name=\"consensus.ledger_close\"}[5m]))",
|
||||
"legendFormat": "{{consensus_mode}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -773,7 +773,7 @@
|
||||
"label": "Consensus Mode",
|
||||
"description": "Filter by consensus mode (Proposing, Observing, Wrong Ledger, Switched Ledger)",
|
||||
"type": "query",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"consensus.ledger_close\"}, xrpl_consensus_mode)",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"consensus.ledger_close\"}, consensus_mode)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
|
||||
@@ -51,7 +51,7 @@ connectors:
|
||||
dimensions:
|
||||
- name: command
|
||||
- name: rpc_status
|
||||
- name: xrpl.consensus.mode
|
||||
- name: consensus_mode
|
||||
- name: close_time_correct
|
||||
- name: local
|
||||
- name: suppressed
|
||||
|
||||
Reference in New Issue
Block a user