From 28befc672cafa299ece666f7aba87294bc1562de Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Fri, 15 May 2026 20:11:54 +0100 Subject: [PATCH] minor corrections Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- .../grafana/dashboards/consensus-health.json | 122 ++++++++++++++++++ .../provisioning/datasources/loki.yaml | 15 +++ 2 files changed, 137 insertions(+) diff --git a/docker/telemetry/grafana/dashboards/consensus-health.json b/docker/telemetry/grafana/dashboards/consensus-health.json index 06e7249107..98e236ceb2 100644 --- a/docker/telemetry/grafana/dashboards/consensus-health.json +++ b/docker/telemetry/grafana/dashboards/consensus-health.json @@ -654,6 +654,128 @@ "refId": "A" } ] + }, + { + "title": "Ledger Total Processing Time (Round Open -> Next Round Start)", + "description": "Per-node duration of the consensus.round span — covers the full local round (open phase, establish/proposal exchange, accept request) and ends only when the next round's startRoundTracing() resets it. Spans multiple consensus retries when MovedOn or Expired forces another round before a ledger is fully accepted. Series: Total = every consensus.round; Accepted Apply = consensus.accept.apply with consensus_state=finished (apply-phase duration only, plotted as a marker for accepted ledgers); Rejected Apply = consensus.accept.apply with consensus_state in (moved_on, expired). NOTE: accept.apply and consensus.round currently land in separate Tempo traces (jtACCEPT thread context propagation), so descendant joins return empty — queries are kept single-span.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 64 + }, + "fieldConfig": { + "defaults": { + "unit": "ms", + "custom": { + "drawStyle": "points", + "pointSize": 6, + "showPoints": "always", + "axisLabel": "Total Processing Time (ms)", + "spanNulls": false + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "displayName", + "value": "Round Total" + }, + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "blue" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "displayName", + "value": "Accepted (apply)" + }, + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "green" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "displayName", + "value": "Rejected (apply)" + }, + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "red" + } + } + ] + } + ] + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + }, + "legend": { + "displayMode": "table", + "placement": "bottom", + "calcs": ["mean", "max", "count"] + } + }, + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "queryType": "traceql", + "query": "{ resource.service.instance.id=~\"$node\" && name=\"consensus.round\" } | select(span:duration)", + "refId": "A" + }, + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "queryType": "traceql", + "query": "{ resource.service.instance.id=~\"$node\" && name=\"consensus.accept.apply\" && span.consensus_state=\"finished\" } | select(span:duration)", + "refId": "B" + }, + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "queryType": "traceql", + "query": "{ resource.service.instance.id=~\"$node\" && name=\"consensus.accept.apply\" && span.consensus_state!=\"finished\" } | select(span:duration)", + "refId": "C" + } + ] } ], "schemaVersion": 39, diff --git a/docker/telemetry/grafana/provisioning/datasources/loki.yaml b/docker/telemetry/grafana/provisioning/datasources/loki.yaml index f5cd051715..78035a9dde 100644 --- a/docker/telemetry/grafana/provisioning/datasources/loki.yaml +++ b/docker/telemetry/grafana/provisioning/datasources/loki.yaml @@ -18,7 +18,22 @@ datasources: uid: loki jsonData: derivedFields: + # Trace ID is an OTel-issued 32-hex value emitted by Logs::format() + # as `trace_id=...`. Grafana treats the captured value as a Tempo + # trace ID, opening the trace directly via Tempo's trace-by-id API. - datasourceUid: tempo matcherRegex: "trace_id=(\\w+)" name: TraceID url: "$${__value.raw}" + # 64-char uppercase hex tokens in log bodies are XRPL ledger + # hashes (or tx hashes). They are NOT OTel trace IDs and cannot + # be resolved via the trace-by-id endpoint. Build a Grafana + # Explore deep-link that runs TraceQL on the span attribute + # `xrpl.consensus.ledger_id` — set on `consensus.round` spans to + # the full prev_ledger hash. This finds the round span whose + # first 16 bytes were folded into the OTel trace_id by + # SpanGuard::hashSpan(). + - matcherRegex: "\\b([A-F0-9]{64})\\b" + name: ConsensusLedgerHash + urlDisplayLabel: "Search Tempo" + url: '/explore?schemaVersion=1&orgId=1&panes={"tempo":{"datasource":"tempo","queries":[{"refId":"A","queryType":"traceql","query":"{.xrpl.consensus.ledger_id=\"$${__value.raw}\"}"}],"range":{"from":"now-1h","to":"now"}}}'